From: jgrosseo Date: Fri, 21 Jul 2006 07:37:20 +0000 (+0000) Subject: last run is stored after each run X-Git-Url: http://git.uio.no/git/?p=u%2Fmrichter%2FAliRoot.git;a=commitdiff_plain;h=7bfb20903909b5071bf3fb12acf008f5e0bdf15b;ds=sidebyside last run is stored after each run --- diff --git a/SHUTTLE/AliShuttle.cxx b/SHUTTLE/AliShuttle.cxx index 7472423378d..d35c5d95742 100644 --- a/SHUTTLE/AliShuttle.cxx +++ b/SHUTTLE/AliShuttle.cxx @@ -15,6 +15,12 @@ /* $Log$ +Revision 1.10 2006/07/20 09:54:40 jgrosseo +introducing status management: The processing per subdetector is divided into several steps, +after each step the status is stored on disk. If the system crashes in any of the steps the Shuttle +can keep track of the number of failures and skips further processing after a certain threshold is +exceeded. These thresholds can be configured in LDAP. + Revision 1.9 2006/07/19 10:09:55 jgrosseo new configuration, accesst to DAQ FES (Alberto) @@ -215,13 +221,9 @@ UInt_t AliShuttle::Store(const char* detector, Log(detector, "Error while storing object in main storage!"); AliError("local storage will be used!"); - AliCDBStorage *origStorage = AliCDBManager::Instance()->GetDefaultStorage(); - result = AliCDBManager::Instance()->GetStorage(fgkLocalUri) ->Put(object, id, metaData); - AliCDBManager::Instance()->SetDefaultStorage(origStorage); - if(result) { result = 2; }else{ @@ -243,13 +245,9 @@ AliShuttleStatus* AliShuttle::ReadShuttleStatus() fStatusEntry = 0; } - AliCDBStorage *origStorage = AliCDBManager::Instance()->GetDefaultStorage(); - fStatusEntry = AliCDBManager::Instance()->GetStorage(AliShuttle::GetLocalURI()) ->Get(Form("/SHUTTLE/STATUS/%s", fCurrentDetector.Data()), fCurrentRun); - AliCDBManager::Instance()->SetDefaultStorage(origStorage); - if (!fStatusEntry) return 0; @@ -265,7 +263,7 @@ AliShuttleStatus* AliShuttle::ReadShuttleStatus() } //______________________________________________________________________________________________ -void AliShuttle::WriteShuttleStatus(AliShuttleStatus* status) +Bool_t AliShuttle::WriteShuttleStatus(AliShuttleStatus* status) { // writes the status for one subdetector @@ -279,9 +277,15 @@ void AliShuttle::WriteShuttleStatus(AliShuttleStatus* status) fStatusEntry = new AliCDBEntry(status, id, new AliCDBMetaData); - AliCDBStorage *origStorage = AliCDBManager::Instance()->GetDefaultStorage(); - AliCDBManager::Instance()->GetStorage(fgkLocalUri)->Put(fStatusEntry); - AliCDBManager::Instance()->SetDefaultStorage(origStorage); + UInt_t result = AliCDBManager::Instance()->GetStorage(fgkLocalUri)->Put(fStatusEntry); + + if (!result) + { + AliError(Form("WriteShuttleStatus for %s, run %d failed", fCurrentDetector.Data(), fCurrentRun)); + return kFALSE; + } + + return kTRUE; } //______________________________________________________________________________________________ @@ -304,15 +308,14 @@ void AliShuttle::UpdateShuttleStatus(AliShuttleStatus::Status newStatus, Bool_t return; } - Log("SHUTTLE", Form("%s: Changing state from %s to %s", fCurrentDetector.Data(), status->GetStatusName(), status->GetStatusName(newStatus))); + Log("SHUTTLE", Form("%s: Changing state from %s to %s", fCurrentDetector.Data(), + status->GetStatusName(), status->GetStatusName(newStatus))); status->SetStatus(newStatus); if (increaseCount) status->IncreaseCount(); - AliCDBStorage *origStorage = AliCDBManager::Instance()->GetDefaultStorage(); AliCDBManager::Instance()->GetStorage(fgkLocalUri)->Put(fStatusEntry); - AliCDBManager::Instance()->SetDefaultStorage(origStorage); } //______________________________________________________________________________________________ @@ -329,9 +332,7 @@ Bool_t AliShuttle::ContinueProcessing() Log("SHUTTLE", Form("%s: Processing first time.", fCurrentDetector.Data())); status = new AliShuttleStatus(AliShuttleStatus::kStarted); - WriteShuttleStatus(status); - - return kTRUE; + return WriteShuttleStatus(status); } if (status->GetStatus() == AliShuttleStatus::kDone) @@ -349,15 +350,17 @@ Bool_t AliShuttle::ContinueProcessing() // if we get here, there is a restart // abort conditions - if (status->GetStatus() == AliShuttleStatus::kPPStarted && status->GetCount() > fConfig->GetMaxPPRetries() || - status->GetCount() > fConfig->GetMaxRetries()) + if (status->GetStatus() == AliShuttleStatus::kPPStarted && status->GetCount() >= fConfig->GetMaxPPRetries() || + status->GetCount() >= fConfig->GetMaxRetries()) { - Log("SHUTTLE", Form("%s, run %d failed to often, %d times, status %s. Skipping processing.", fCurrentDetector.Data(), fCurrentRun, status->GetCount(), status->GetStatusName())); + Log("SHUTTLE", Form("%s, run %d failed too often, %d times, status %s. Skipping processing.", + fCurrentDetector.Data(), fCurrentRun, status->GetCount(), status->GetStatusName())); return kFALSE; } - Log("SHUTTLE", Form("Restart of %s, run %d. Got stuck before in %s, count %d", fCurrentDetector.Data(), fCurrentRun, status->GetStatusName(), status->GetCount())); + Log("SHUTTLE", Form("Restart of %s, run %d. Got stuck before in %s, count %d", + fCurrentDetector.Data(), fCurrentRun, status->GetStatusName(), status->GetCount())); UpdateShuttleStatus(AliShuttleStatus::kStarted, kTRUE); @@ -381,7 +384,7 @@ Bool_t AliShuttle::Process(Int_t run, UInt_t startTime, UInt_t endTime) Bool_t hasError = kFALSE; for(Int_t iSys=0;iSys<3;iSys++) fFESCalled[iSys]=kFALSE; - fCurrentRun = run; + fCurrentRun = run; fCurrentStartTime = startTime; fCurrentEndTime = endTime; @@ -390,13 +393,12 @@ Bool_t AliShuttle::Process(Int_t run, UInt_t startTime, UInt_t endTime) TObjString* aDetector; while ((aDetector = (TObjString*) iter.Next())) { - fCurrentDetector = aDetector->String(); + fCurrentDetector = aDetector->String(); Bool_t detectorError=kFALSE; if (!fConfig->HostProcessDetector(fCurrentDetector)) continue; - if (ContinueProcessing() == kFALSE) - continue; + if (ContinueProcessing() == kFALSE) continue; if(!Process()) { hasError = kTRUE; @@ -419,8 +421,8 @@ Bool_t AliShuttle::Process(Int_t run, UInt_t startTime, UInt_t endTime) // fFESlist[kHLT].Clear(); //} - UpdateShuttleStatus(AliShuttleStatus::kDone); - } + UpdateShuttleStatus(AliShuttleStatus::kDone); + } fCurrentRun = -1; fCurrentStartTime = 0; @@ -446,13 +448,13 @@ Bool_t AliShuttle::Process() if (!fConfig->HasDetector(fCurrentDetector)) { Log(fCurrentDetector, "There isn't any configuration for %s !"); - UpdateShuttleStatus(AliShuttleStatus::kFailed); + UpdateShuttleStatus(AliShuttleStatus::kFailed); return kFALSE; } - UpdateShuttleStatus(AliShuttleStatus::kDCSStarted); + UpdateShuttleStatus(AliShuttleStatus::kDCSStarted); - TString host(fConfig->GetDCSHost(fCurrentDetector)); + TString host(fConfig->GetDCSHost(fCurrentDetector)); Int_t port = fConfig->GetDCSPort(fCurrentDetector); TIter iter(fConfig->GetDCSAliases(fCurrentDetector)); @@ -477,13 +479,11 @@ Bool_t AliShuttle::Process() } } - // even if hasError is TRUE the Shuttle should keep on processing the detector (calib files!) - if (hasError) - { - UpdateShuttleStatus(AliShuttleStatus::kDCSError); - return kFALSE; - } + { + UpdateShuttleStatus(AliShuttleStatus::kDCSError); + return kFALSE; + } UpdateShuttleStatus(AliShuttleStatus::kPPStarted); @@ -936,18 +936,23 @@ void AliShuttle::Log(const char* detector, const char* message) { // Fill log string with a message - TString toLog = Form("%s, run %d - %s", detector, GetCurrentRun(), message); - AliInfo(toLog.Data()); + TString toLog = Form("%s: %s, run %d - %s", TTimeStamp(time(0)).AsString(), + detector, GetCurrentRun(), message); + AliInfo(toLog.Data()); TString fileName; fileName.Form("%s/%s.log", fgkShuttleLogDir, detector); gSystem->ExpandPathName(fileName); - AliInfo(fileName.Data()); - ofstream logFile; logFile.open(fileName, ofstream::out | ofstream::app); + if (!logFile.is_open()) + { + AliError(Form("Could not open file %s", fileName.Data())); + return; + } + logFile << toLog.Data() << "\n"; logFile.close(); diff --git a/SHUTTLE/AliShuttle.h b/SHUTTLE/AliShuttle.h index 334567c385e..2d972c08f81 100644 --- a/SHUTTLE/AliShuttle.h +++ b/SHUTTLE/AliShuttle.h @@ -81,7 +81,7 @@ private: TList* GetHLTFileSources(const char* detector, const char* id); AliShuttleStatus* ReadShuttleStatus(); - void WriteShuttleStatus(AliShuttleStatus* status); + Bool_t WriteShuttleStatus(AliShuttleStatus* status); Bool_t ContinueProcessing(); void UpdateShuttleStatus(AliShuttleStatus::Status newStatus, Bool_t increaseCount = kFALSE); diff --git a/SHUTTLE/AliShuttleTrigger.cxx b/SHUTTLE/AliShuttleTrigger.cxx index a21cf1723b4..b0dae21fa30 100644 --- a/SHUTTLE/AliShuttleTrigger.cxx +++ b/SHUTTLE/AliShuttleTrigger.cxx @@ -15,6 +15,12 @@ /* $Log$ + Revision 1.7 2006/07/20 09:54:40 jgrosseo + introducing status management: The processing per subdetector is divided into several steps, + after each step the status is stored on disk. If the system crashes in any of the steps the Shuttle + can keep track of the number of failures and skips further processing after a certain threshold is + exceeded. These thresholds can be configured in LDAP. + Revision 1.6 2006/07/19 10:09:55 jgrosseo new configuration, accesst to DAQ FES (Alberto) @@ -107,7 +113,7 @@ ClassImp(AliShuttleTrigger) AliShuttleTrigger::AliShuttleTrigger(const AliShuttleConfig* config, UInt_t timeout, Int_t retries): fConfig(config), fShuttle(NULL), - fNotified(kFALSE), fTerminate(kFALSE), fCondition(&fMutex), + fNotified(kFALSE), fTerminate(kFALSE), fLastRun(0), fCondition(&fMutex), fQuitSignalHandler(this, kSigQuit), fInterruptSignalHandler(this, kSigInterrupt) { @@ -304,29 +310,95 @@ Bool_t AliShuttleTrigger::RetrieveDATEEntries(const char* whereClause, } //______________________________________________________________________________________________ -Bool_t AliShuttleTrigger::RetrieveConditionsData(const TObjArray& dateEntries, Int_t &lastRun) +Bool_t AliShuttleTrigger::RetrieveConditionsData(const TObjArray& dateEntries, Bool_t updateLastRun) { -// Retrieve conditions data for all runs that aren't processed yet - - Bool_t hasError = kFALSE; - - TIter iter(&dateEntries); - AliShuttleTriggerDATEEntry* anEntry; - lastRun=-1; - while ((anEntry = (AliShuttleTriggerDATEEntry*) iter.Next())) { - Bool_t processError = kFALSE; - if(lastRun == -1) lastRun = anEntry->GetRun(); - if(!fShuttle->Process(anEntry->GetRun(), - anEntry->GetStartTime(), - anEntry->GetEndTime())) { - processError = kTRUE; - hasError = kTRUE; - } - // Only the last SUCCESSFUL run must be stored! - if(!hasError && !processError) lastRun = anEntry->GetRun(); - } + // Retrieve conditions data for all runs that aren't processed yet + + Bool_t hasError = kFALSE; + + TIter iter(&dateEntries); + AliShuttleTriggerDATEEntry* anEntry; + + while ((anEntry = (AliShuttleTriggerDATEEntry*) iter.Next())) + { + if (!fShuttle->Process(anEntry->GetRun(), + anEntry->GetStartTime(), + anEntry->GetEndTime())) + { + hasError = kTRUE; + } + + if (!hasError && updateLastRun && fLastRun < anEntry->GetRun()) + { + fLastRun = anEntry->GetRun(); + WriteLastRun(); + } + } + + return hasError == kFALSE; +} + +//______________________________________________________________________________________________ +Bool_t AliShuttleTrigger::ReadLastRun() +{ + // reads the last processed run from local CDB + + AliCDBEntry* cdbEntry = AliCDBManager::Instance()->GetStorage(AliShuttle::GetLocalURI()) + ->Get("/SHUTTLE/SYSTEM/LASTRUN", 0); + + if (cdbEntry) + { + TObject* anObject = cdbEntry->GetObject(); + if (anObject == NULL || anObject->IsA() != AliDCSValue::Class()) + { + AliError("Invalid last run object stored to CDB!"); + return kFALSE; + } + AliDCSValue* dcsValue = (AliDCSValue*) anObject; + fLastRun = dcsValue->GetInt(); + + delete cdbEntry; + } + else + { + AliFatal("No last run number stored. Please set first. Aborting"); + return kFALSE; + } + + AliInfo(Form("Last run number <%d>", fLastRun)); + + return kTRUE; +} - return hasError == kFALSE; +//______________________________________________________________________________________________ +Bool_t AliShuttleTrigger::WriteLastRun() +{ + // writes the last succesfully processed run to local CDB + + AliDCSValue lastRunObj(fLastRun, 0); + AliCDBMetaData metaData; + AliCDBId cdbID(AliCDBPath("SHUTTLE", "SYSTEM", "LASTRUN"), 0, 0); + + UInt_t result = AliCDBManager::Instance()->GetStorage(AliShuttle::GetLocalURI()) + ->Put(&lastRunObj, cdbID, &metaData); + + if (!result) { + AliError("Can't store last run to CDB!"); + return kFALSE; + } + + return kTRUE; +} + +//______________________________________________________________________________________________ +Bool_t AliShuttleTrigger::SetNewLastRun(Int_t run) +{ + // sets a new run manually, use with caution! + + fShuttle->Log("SHUTTLE", Form("Setting last run manually to %d", run)); + + fLastRun = run; + return WriteLastRun(); } //______________________________________________________________________________________________ @@ -358,8 +430,7 @@ Bool_t AliShuttleTrigger::Collect(Int_t run) return kFALSE; } - Int_t lastRun; - if (!RetrieveConditionsData(dateEntries, lastRun)) { + if (!RetrieveConditionsData(dateEntries, kFALSE)) { AliError("An error occured during conditions data retrieval!"); return kFALSE; } @@ -368,7 +439,7 @@ Bool_t AliShuttleTrigger::Collect(Int_t run) } //______________________________________________________________________________________________ -Bool_t AliShuttleTrigger::CollectNew() +Bool_t AliShuttleTrigger::CollectNew() { // // Collects conditions data for all new run written to DAQ LogBook. @@ -376,78 +447,51 @@ Bool_t AliShuttleTrigger::CollectNew() AliInfo("Collecting conditions data for new runs ..."); - Int_t lastRun; - - AliCDBEntry* cdbEntry = AliCDBManager::Instance()->GetStorage(AliShuttle::GetLocalURI()) - ->Get("/SHUTTLE/SYSTEM/LASTRUN", 0); - if (cdbEntry) { - TObject* anObject = cdbEntry->GetObject(); - if (anObject == NULL || - anObject->IsA() != AliDCSValue::Class()) { - AliError("Invalid last run object stored to CDB!"); - return kFALSE; - } - AliDCSValue* simpleValue = (AliDCSValue*) anObject; - lastRun = simpleValue->GetInt(); - AliInfo(Form("Last run successfully stored: %d",lastRun)); - delete cdbEntry; - } else { - AliWarning("There isn't last run stored! Starting from run 21240"); - lastRun = 21240; // TODO maybe exit here - } - - AliInfo(Form("Last run number <%d>", lastRun)); + if (!ReadLastRun()) + { + AliError("Retrieving of last run failed"); + return kFALSE; + } TString whereClause("where run > "); - whereClause += lastRun; + whereClause += fLastRun; - Int_t newLastRun; TObjArray dateEntries; if (!RetrieveDATEEntries(whereClause, dateEntries)) { AliError("Can't retrieve entries from DAQ log book."); return kFALSE; } - if (!RetrieveConditionsData(dateEntries, newLastRun)) { + if (!RetrieveConditionsData(dateEntries, kTRUE)) { AliError("Process of at least one run failed!"); // return kFALSE; } - - if (newLastRun > lastRun) { - AliDCSValue lastRunObj(newLastRun, 0); - AliCDBMetaData metaData; - AliCDBId cdbID(AliCDBPath("SHUTTLE", "SYSTEM", "LASTRUN"), 0, 0); - - UInt_t result = AliCDBManager::Instance()->GetStorage(AliShuttle::GetLocalURI()) - ->Put(&lastRunObj, cdbID, &metaData); - if (!result) { - AliError("Can't store last run to CDB!"); - return kFALSE; - } - } - - return kTRUE; } //______________________________________________________________________________________________ -Bool_t AliShuttleTrigger::CollectAll() +Bool_t AliShuttleTrigger::CollectAll() { // // Collects conditions data for all run written in DAQ LogBook. // + if (!ReadLastRun()) + { + AliError("Retrieving of last run failed"); + return kFALSE; + } + AliInfo("Collecting conditions data for all runs ..."); - Int_t lastRun; TObjArray dateEntries; if (!RetrieveDATEEntries("", dateEntries)) { AliError("Can't retrieve entries from DAQ log book."); return kFALSE; } - if (!RetrieveConditionsData(dateEntries, lastRun)) { + if (!RetrieveConditionsData(dateEntries, kTRUE)) { AliError("An error occured during conditions data retrieval!"); return kFALSE; } diff --git a/SHUTTLE/AliShuttleTrigger.h b/SHUTTLE/AliShuttleTrigger.h index 804a0e2abb0..8335066a2f9 100644 --- a/SHUTTLE/AliShuttleTrigger.h +++ b/SHUTTLE/AliShuttleTrigger.h @@ -55,7 +55,9 @@ public: Bool_t Collect(Int_t run); Bool_t CollectNew(); Bool_t CollectAll(); - + + Bool_t SetNewLastRun(Int_t run); + virtual Bool_t Notify(); void Terminate(); @@ -83,7 +85,10 @@ private: }; Bool_t RetrieveDATEEntries(const char* whereClause, TObjArray& entries); - Bool_t RetrieveConditionsData(const TObjArray& dateEntries, Int_t& lastRun); + Bool_t RetrieveConditionsData(const TObjArray& dateEntries, Bool_t updateLastRun); + + Bool_t ReadLastRun(); + Bool_t WriteLastRun(); const AliShuttleConfig* fConfig; //AliCDBStorage* fLocalStorage; @@ -93,6 +98,8 @@ private: Bool_t fNotified; // Notified flag Bool_t fTerminate; // Terminate flag + Int_t fLastRun; // last sucessfully processed run + TMutex fMutex; // Mutex TCondition fCondition; // Condition diff --git a/SHUTTLE/Shuttle.C b/SHUTTLE/Shuttle.C index fcf848753d4..a2467c37f6c 100644 --- a/SHUTTLE/Shuttle.C +++ b/SHUTTLE/Shuttle.C @@ -35,6 +35,10 @@ void Shuttle(const char* param = "listen") { trigger.CollectAll(); } else if (paramStr == "listen") { trigger.Run(); + } else if (paramStr.BeginsWith("lastrun=")) { + Int_t run = TString(paramStr(8, paramStr.Length()-7).Data()).Atoi(); + cout << run << endl; + trigger.SetNewLastRun(run); } else { cout<<"Bad parameter: "< - sets last run manually. use with caution!" << endl cout<<" - the same as 'listen'"<