X-Git-Url: http://git.uio.no/git/?a=blobdiff_plain;f=SHUTTLE%2FAliShuttle.cxx;h=02a723f4e0bd2cab204aa03a01ae174018434be9;hb=955d2abc1fff41f135abce1b4c40cfbca0cb2ff4;hp=af34760896b6a509f701e5585efd4f21e1b59597;hpb=b0e53b15204dc0d07d3c8e9b1880ca989fc70814;p=u%2Fmrichter%2FAliRoot.git diff --git a/SHUTTLE/AliShuttle.cxx b/SHUTTLE/AliShuttle.cxx index af34760896b..02a723f4e0b 100644 --- a/SHUTTLE/AliShuttle.cxx +++ b/SHUTTLE/AliShuttle.cxx @@ -15,6 +15,36 @@ /* $Log$ +Revision 1.75 2007/12/18 15:42:14 jgrosseo +adding number of open runs to monitoring + +Revision 1.74 2007/12/17 03:23:32 jgrosseo +several bugfixes +added "empty preprocessor" as placeholder for Acorde in FDR + +Revision 1.73 2007/12/14 19:31:36 acolla +Sending email to DCS experts is temporarily commented + +Revision 1.72 2007/12/13 15:44:28 acolla +Run type added in mail sent to detector expert (eases understanding) + +Revision 1.71 2007/12/12 14:56:14 jgrosseo +sending shuttle_ignore to ML also in case of 0 events + +Revision 1.70 2007/12/12 13:45:35 acolla +Monalisa started in Collect() function. Alive message to monitor is sent at each Collect and every minute during preprocessor processing. + +Revision 1.69 2007/12/12 10:06:29 acolla +in AliShuttle.cxx: SHUTTLE logbook is updated in case of invalid run times: + +time_start==0 && time_end==0 + +logbook is NOT updated if time_start != 0 && time_end == 0, because it may mean that the run is still ongoing. + +Revision 1.68 2007/12/11 10:15:17 acolla +Added marking SHUTTLE=DONE for invalid runs +(invalid start time or end time) and runs with totalEvents < 1 + Revision 1.67 2007/12/07 19:14:36 acolla in AliShuttleTrigger: @@ -634,6 +664,7 @@ Bool_t AliShuttle::StoreOCDB(const TString& gridURI) Log("SHUTTLE", Form("StoreOCDB - %s: object %s has validity infinite but " "there are previous unprocessed runs!", fCurrentDetector.Data(), aLocId.GetPath().Data())); + result = kFALSE; continue; } @@ -832,7 +863,7 @@ Bool_t AliShuttle::StoreRunMetadataFile(const char* localFile, const char* gridF TString localBaseFolder = sto->GetBaseFolder(); // Build Run level folder - // folder = /alice/data/year/lhcPeriod/runNb/Raw + // folder = /alice/data/year/lhcPeriod/runNb/raw TString lhcPeriod = GetLHCPeriod(); @@ -842,7 +873,7 @@ Bool_t AliShuttle::StoreRunMetadataFile(const char* localFile, const char* gridF return 0; } - TString target = Form("%s/GRP/RunMetadata/alice/data/%d/%s/%09d/Raw/%s", + TString target = Form("%s/GRP/RunMetadata/alice/data/%d/%s/%09d/raw/%s", localBaseFolder.Data(), GetCurrentYear(), lhcPeriod.Data(), GetCurrentRun(), gridFileName); @@ -954,7 +985,7 @@ Bool_t AliShuttle::CopyFilesToGrid(const char* type) return 0; } - dir = Form("%s/GRP/RunMetadata/alice/data/%d/%s/%09d/Raw", + dir = Form("%s/GRP/RunMetadata/alice/data/%d/%s/%09d/raw", localBaseFolder.Data(), GetCurrentYear(), lhcPeriod.Data(), GetCurrentRun()); alienDir = dir(dir.Index("/alice/data/"), dir.Length()); @@ -1260,7 +1291,9 @@ void AliShuttle::SendMLInfo() mlList.Add(&mlStatus); mlList.Add(&mlRetryCount); - fMonaLisa->SendParameters(&mlList); + TString mlID; + mlID.Form("%d", GetCurrentRun()); + fMonaLisa->SendParameters(&mlList, mlID); } //______________________________________________________________________________________________ @@ -1410,9 +1443,6 @@ Bool_t AliShuttle::Process(AliShuttleLogbookEntry* entry) Log("SHUTTLE", Form("\t\t\t^*^*^*^*^*^*^*^*^*^*^*^* run %d: START ^*^*^*^*^*^*^*^*^*^*^*^*", GetCurrentRun())); - // create ML instance that monitors this run - fMonaLisa = new TMonaLisaWriter(fConfig->GetMonitorHost(), fConfig->GetMonitorTable(), Form("%d", GetCurrentRun())); - // Send the information to ML TMonaLisaText mlStatus("SHUTTLE_status", "Processing"); TMonaLisaText mlRunType("SHUTTLE_runtype", Form("%s (%s)", entry->GetRunType(), entry->GetRunParameter("log"))); @@ -1421,7 +1451,9 @@ Bool_t AliShuttle::Process(AliShuttleLogbookEntry* entry) mlList.Add(&mlStatus); mlList.Add(&mlRunType); - fMonaLisa->SendParameters(&mlList); + TString mlID; + mlID.Form("%d", GetCurrentRun()); + fMonaLisa->SendParameters(&mlList, mlID); if (fLogbookEntry->IsDone()) { @@ -1485,20 +1517,27 @@ Bool_t AliShuttle::Process(AliShuttleLogbookEntry* entry) fgkMainRefStorage = Form("alien://folder=/alice/data/%d/%s/Reference?user=alidaq?cacheFold=/tmp/OCDBCache", GetCurrentYear(), lhcPeriod.Data()); - AliCDBStorage *mainCDBSto = AliCDBManager::Instance()->GetStorage(fgkMainCDB); - if(mainCDBSto) mainCDBSto->QueryCDB(GetCurrentRun()); - AliCDBStorage *mainRefSto = AliCDBManager::Instance()->GetStorage(fgkMainRefStorage); - if(mainRefSto) mainRefSto->QueryCDB(GetCurrentRun()); - // Loop on detectors in the configuration TIter iter(fConfig->GetDetectors()); TObjString* aDetector = 0; + Bool_t first = kTRUE; + while ((aDetector = (TObjString*) iter.Next())) { fCurrentDetector = aDetector->String(); if (ContinueProcessing() == kFALSE) continue; + + if (first) + { + // only read QueryCDB when needed and only once + AliCDBStorage *mainCDBSto = AliCDBManager::Instance()->GetStorage(fgkMainCDB); + if(mainCDBSto) mainCDBSto->QueryCDB(GetCurrentRun()); + AliCDBStorage *mainRefSto = AliCDBManager::Instance()->GetStorage(fgkMainRefStorage); + if(mainRefSto) mainRefSto->QueryCDB(GetCurrentRun()); + first = kFALSE; + } Log("SHUTTLE", Form("\t\t\t****** run %d - %s: START ******", GetCurrentRun(), aDetector->GetName())); @@ -1575,9 +1614,12 @@ Bool_t AliShuttle::Process(AliShuttleLogbookEntry* entry) } if (expiredTime % 60 == 0) + { Log("SHUTTLE", Form("Process - %s: Checking process. " "Run time: %d seconds - Memory consumption: %d KB", fCurrentDetector.Data(), expiredTime, mem)); + SendAlive(); + } if (mem > fConfig->GetPPMaxMem()) { @@ -1662,7 +1704,8 @@ Bool_t AliShuttle::Process(AliShuttleLogbookEntry* entry) if (success) // Preprocessor finished successfully! { // remove temporary folder - gSystem->Exec(Form("rm -rf %s",tmpDir.Data())); + // temporary commented (JF) + //gSystem->Exec(Form("rm -rf %s",tmpDir.Data())); // Update time_processed field in FXS DB if (UpdateTable() == kFALSE) @@ -1745,10 +1788,6 @@ Bool_t AliShuttle::Process(AliShuttleLogbookEntry* entry) } } - // remove ML instance - delete fMonaLisa; - fMonaLisa = 0; - fLogbookEntry = 0; return hasError == kFALSE; @@ -1831,8 +1870,8 @@ Bool_t AliShuttle::ProcessCurrentDetector() " Sending mail to DCS experts!", host.Data())); UpdateShuttleStatus(AliShuttleStatus::kDCSError); - if (!SendMailToDCS()) - Log("SHUTTLE", Form("ProcessCurrentDetector - Could not send mail to DCS experts!")); + //if (!SendMailToDCS()) + // Log("SHUTTLE", Form("ProcessCurrentDetector - Could not send mail to DCS experts!")); delete dcsMap; return kFALSE; @@ -1852,8 +1891,8 @@ Bool_t AliShuttle::ProcessCurrentDetector() " Sending mail to DCS experts!", host.Data())); UpdateShuttleStatus(AliShuttleStatus::kDCSError); - if (!SendMailToDCS()) - Log("SHUTTLE", Form("ProcessCurrentDetector - Could not send mail to DCS experts!")); + //if (!SendMailToDCS()) + // Log("SHUTTLE", Form("ProcessCurrentDetector - Could not send mail to DCS experts!")); if (aliasMap) delete aliasMap; delete dcsMap; @@ -1917,6 +1956,58 @@ Bool_t AliShuttle::ProcessCurrentDetector() return kTRUE; } +//______________________________________________________________________________________________ +void AliShuttle::CountOpenRuns() +{ + // Query DAQ's Shuttle logbook and sends the number of open runs to ML + + // check connection, in case connect + if (!Connect(3)) + return; + + TString sqlQuery; + sqlQuery = Form("select count(*) from %s where shuttle_done=0", fConfig->GetShuttlelbTable()); + + TSQLResult* aResult = fServer[3]->Query(sqlQuery); + if (!aResult) { + AliError(Form("Can't execute query <%s>!", sqlQuery.Data())); + return; + } + + AliDebug(2,Form("Query = %s", sqlQuery.Data())); + + if (aResult->GetRowCount() == 0) { + AliError(Form("No result for query %s received", sqlQuery.Data())); + return; + } + + if (aResult->GetFieldCount() != 1) { + AliError(Form("Invalid field count for query %s received", sqlQuery.Data())); + return; + } + + TSQLRow* aRow = aResult->Next(); + if (!aRow) { + AliError(Form("Could not receive result of query %s", sqlQuery.Data())); + return; + } + + TString result(aRow->GetField(0), aRow->GetFieldLength(0)); + Int_t count = result.Atoi(); + + Log("SHUTTLE", Form("%d unprocessed runs", count)); + + delete aRow; + delete aResult; + + TMonaLisaValue mlStatus("SHUTTLE_openruns", count); + + TList mlList; + mlList.Add(&mlStatus); + + fMonaLisa->SendParameters(&mlList, "__PROCESSINGINFO__"); +} + //______________________________________________________________________________________________ Bool_t AliShuttle::QueryShuttleLogbook(const char* whereClause, TObjArray& entries) @@ -1928,7 +2019,7 @@ Bool_t AliShuttle::QueryShuttleLogbook(const char* whereClause, entries.SetOwner(1); // check connection, in case connect - if(!Connect(3)) return kFALSE; + if (!Connect(3)) return kFALSE; TString sqlQuery; sqlQuery = Form("select * from %s %s order by run", fConfig->GetShuttlelbTable(), whereClause); @@ -2025,15 +2116,36 @@ AliShuttleLogbookEntry* AliShuttle::QueryRunParameters(Int_t run) UInt_t startTime = entry->GetStartTime(); UInt_t endTime = entry->GetEndTime(); - if (!startTime || !endTime || startTime > endTime) +// if (!startTime || !endTime || startTime > endTime) +// { +// Log("SHUTTLE", +// Form("QueryRunParameters - Invalid parameters for Run %d: startTime = %d, endTime = %d. Skipping!", +// run, startTime, endTime)); +// +// Log("SHUTTLE", Form("Marking SHUTTLE done for run %d", run)); +// fLogbookEntry = entry; +// if (!UpdateShuttleLogbook("shuttle_done")) +// { +// AliError(Form("Could not update logbook for run %d !", run)); +// } +// fLogbookEntry = 0; +// +// delete entry; +// delete aRow; +// delete aResult; +// return 0; +// } + + if (!startTime) { Log("SHUTTLE", - Form("QueryRunParameters - Invalid parameters for Run %d: startTime = %d, endTime = %d. Skipping!", - run, startTime, endTime)); + Form("QueryRunParameters - Invalid parameters for Run %d: " + "startTime = %d, endTime = %d. Skipping!", + run, startTime, endTime)); Log("SHUTTLE", Form("Marking SHUTTLE done for run %d", run)); fLogbookEntry = entry; - if (!UpdateShuttleLogbook("shuttle_done")) + if (!UpdateShuttleLogbook("shuttle_ignored")) { AliError(Form("Could not update logbook for run %d !", run)); } @@ -2045,6 +2157,50 @@ AliShuttleLogbookEntry* AliShuttle::QueryRunParameters(Int_t run) return 0; } + if (startTime && !endTime) + { + // TODO Here we don't mark SHUTTLE done, because this may mean + //the run is still ongoing!! + Log("SHUTTLE", + Form("QueryRunParameters - Invalid parameters for Run %d: " + "startTime = %d, endTime = %d. Skipping (Shuttle won't be marked as DONE)!", + run, startTime, endTime)); + + //Log("SHUTTLE", Form("Marking SHUTTLE done for run %d", run)); + //fLogbookEntry = entry; + //if (!UpdateShuttleLogbook("shuttle_done")) + //{ + // AliError(Form("Could not update logbook for run %d !", run)); + //} + //fLogbookEntry = 0; + + delete entry; + delete aRow; + delete aResult; + return 0; + } + + if (startTime && endTime && (startTime > endTime)) + { + Log("SHUTTLE", + Form("QueryRunParameters - Invalid parameters for Run %d: " + "startTime = %d, endTime = %d. Skipping!", + run, startTime, endTime)); + + Log("SHUTTLE", Form("Marking SHUTTLE done for run %d", run)); + fLogbookEntry = entry; + if (!UpdateShuttleLogbook("shuttle_ignored")) + { + AliError(Form("Could not update logbook for run %d !", run)); + } + fLogbookEntry = 0; + + delete entry; + delete aRow; + delete aResult; + return 0; + } + TString totEventsStr = entry->GetRunParameter("totalEvents"); Int_t totEvents = totEventsStr.Atoi(); if (totEvents < 1) @@ -2054,7 +2210,7 @@ AliShuttleLogbookEntry* AliShuttle::QueryRunParameters(Int_t run) Log("SHUTTLE", Form("Marking SHUTTLE done for run %d", run)); fLogbookEntry = entry; - if (!UpdateShuttleLogbook("shuttle_done")) + if (!UpdateShuttleLogbook("shuttle_ignored")) { AliError(Form("Could not update logbook for run %d !", run)); } @@ -2703,11 +2859,11 @@ Bool_t AliShuttle::UpdateShuttleLogbook(const char* detector, const char* status TString detName(detector); TString setClause; - if(detName == "shuttle_done") + if (detName == "shuttle_done" || detName == "shuttle_ignored") { setClause = "set shuttle_done=1"; - if (fMonaLisa) + if (detName == "shuttle_done") { // Send the information to ML TMonaLisaText mlStatus("SHUTTLE_status", "Done"); @@ -2715,7 +2871,9 @@ Bool_t AliShuttle::UpdateShuttleLogbook(const char* detector, const char* status TList mlList; mlList.Add(&mlStatus); - fMonaLisa->SendParameters(&mlList); + TString mlID; + mlID.Form("%d", GetCurrentRun()); + fMonaLisa->SendParameters(&mlList, mlID); } } else { TString statusStr(status); @@ -2877,6 +3035,19 @@ TString AliShuttle::GetLogFileName(const char* detector) const return fileName; } +//______________________________________________________________________________________________ +void AliShuttle::SendAlive() +{ + // sends alive message to ML + + TMonaLisaText mlStatus("SHUTTLE_status", "Alive"); + + TList mlList; + mlList.Add(&mlStatus); + + fMonaLisa->SendParameters(&mlList, "__PROCESSINGINFO__"); +} + //______________________________________________________________________________________________ Bool_t AliShuttle::Collect(Int_t run) { @@ -2894,6 +3065,13 @@ Bool_t AliShuttle::Collect(Int_t run) SetLastAction("Starting"); + // create ML instance + if (!fMonaLisa) + fMonaLisa = new TMonaLisaWriter(fConfig->GetMonitorHost(), fConfig->GetMonitorTable()); + + SendAlive(); + CountOpenRuns(); + TString whereClause("where shuttle_done=0"); if (run != -1) whereClause += Form(" and run=%d", run); @@ -3082,6 +3260,22 @@ Bool_t AliShuttle::SendMail() if (fTestMode != kNone) return kTRUE; + TString to=""; + TIter iterExperts(fConfig->GetResponsibles(fCurrentDetector)); + TObjString *anExpert=0; + while ((anExpert = (TObjString*) iterExperts.Next())) + { + to += Form("%s,", anExpert->GetName()); + } + if (to.Length() > 0) + to.Remove(to.Length()-1); + AliDebug(2, Form("to: %s",to.Data())); + + if (to.IsNull()) { + Log("SHUTTLE", "List of detector responsibles not yet set!"); + return kFALSE; + } + void* dir = gSystem->OpenDirectory(GetShuttleLogDir()); if (dir == NULL) { @@ -3108,37 +3302,23 @@ Bool_t AliShuttle::SendMail() return kFALSE; } - TString to=""; - TIter iterExperts(fConfig->GetResponsibles(fCurrentDetector)); - TObjString *anExpert=0; - while ((anExpert = (TObjString*) iterExperts.Next())) - { - to += Form("%s,", anExpert->GetName()); - } - to.Remove(to.Length()-1); - AliDebug(2, Form("to: %s",to.Data())); - - if (to.IsNull()) { - Log("SHUTTLE", "List of detector responsibles not yet set!"); - return kFALSE; - } - TString cc="alberto.colla@cern.ch"; - TString subject = Form("%s Shuttle preprocessor FAILED in run %d !", - fCurrentDetector.Data(), GetCurrentRun()); + TString subject = Form("%s Shuttle preprocessor FAILED in run %d (run type = %s)!", + fCurrentDetector.Data(), GetCurrentRun(), GetRunType()); AliDebug(2, Form("subject: %s", subject.Data())); TString body = Form("Dear %s expert(s), \n\n", fCurrentDetector.Data()); body += Form("SHUTTLE just detected that your preprocessor " - "failed processing run %d!!\n\n", GetCurrentRun()); + "failed processing run %d (run type = %s)!!\n\n", + GetCurrentRun(), GetRunType()); body += Form("Please check %s status on the SHUTTLE monitoring page: \n\n", fCurrentDetector.Data()); if (fConfig->GetRunMode() == AliShuttleConfig::kTest) { body += Form("\thttp://pcalimonitor.cern.ch:8889/shuttle.jsp?time=168 \n\n"); } else { - body += Form("\thttp://pcalimonitor.cern.ch/shuttle.jsp?instance=PROD?time=168 \n\n"); + body += Form("\thttp://pcalimonitor.cern.ch/shuttle.jsp?instance=PROD&time=168 \n\n"); }