X-Git-Url: http://git.uio.no/git/?a=blobdiff_plain;f=SHUTTLE%2FAliShuttle.cxx;h=75d96728b96b0db9b21440ca0c54ac32fb5b43ef;hb=6a62da73fb1ac261f88e15bf2ed2f5172012ad8b;hp=a7f736ce6dfe3d075495358cb7ec41ef93c2ce24;hpb=7802e8849a2b6ec69f2449ca7bd2f135172a105b;p=u%2Fmrichter%2FAliRoot.git diff --git a/SHUTTLE/AliShuttle.cxx b/SHUTTLE/AliShuttle.cxx index a7f736ce6df..75d96728b96 100644 --- a/SHUTTLE/AliShuttle.cxx +++ b/SHUTTLE/AliShuttle.cxx @@ -28,6 +28,7 @@ // For detSpec is used the alias name. // +#include #include "AliShuttle.h" #include "AliCDBManager.h" @@ -67,6 +68,7 @@ #include #include +using namespace std; ClassImp(AliShuttle) @@ -96,9 +98,9 @@ fOutputRedirected(kFALSE) // if (!fConfig->IsValid()) AliFatal("********** !!!!! Invalid configuration !!!!! **********"); - for(int iSys=0;iSys<4;iSys++) { + for(int iSys=0;iSys<5;iSys++) { fServer[iSys]=0; - if (iSys < 3) + if (iSys < 4) fFXSlist[iSys].SetOwner(kTRUE); } fPreprocessorMap.SetOwner(kTRUE); @@ -117,7 +119,7 @@ AliShuttle::~AliShuttle() // fPreprocessorMap.DeleteAll(); - for(int iSys=0;iSys<4;iSys++) + for(int iSys=0;iSys<5;iSys++) if(fServer[iSys]) { fServer[iSys]->Close(); delete fServer[iSys]; @@ -200,6 +202,7 @@ Bool_t AliShuttle::StoreLocally(const TString& localUri, // // returns 0 if fail, 1 otherwise + if (fTestMode & kErrorStorage) { Log(fCurrentDetector, "StoreLocally - In TESTMODE - Simulating error while storing locally"); @@ -234,8 +237,11 @@ Bool_t AliShuttle::StoreLocally(const TString& localUri, if (!(AliCDBManager::Instance()->GetStorage(localUri))) { Log("SHUTTLE", Form("StoreLocally - Cannot activate local %s storage", cdbType)); } else { + Int_t logLevel = AliLog::GetGlobalLogLevel(); + AliLog::SetGlobalLogLevel(AliLog::kError); result = AliCDBManager::Instance()->GetStorage(localUri) ->Put(object, id, metaData); + AliLog::SetGlobalLogLevel((AliLog::EType_t)logLevel); } if(!result) { @@ -243,6 +249,7 @@ Bool_t AliShuttle::StoreLocally(const TString& localUri, Log(fCurrentDetector, Form("StoreLocally - Can't store object <%s>!", id.ToString().Data())); } + return result; } @@ -352,6 +359,7 @@ Int_t AliShuttle::StoreOCDB(const TString& gridURI) Form("StoreOCDB - cannot activate main %s storage", type)); return -2; } + gridSto->SetMirrorSEs(fgkMirrorSEs.Data()); gridIds = gridSto->GetQueryCDBList(); @@ -484,7 +492,7 @@ Bool_t AliShuttle::CleanReferenceStorage(const char* detector) if (!dirList) return kTRUE; - if (dirList->GetEntries() < 3) + if (dirList->GetEntries() < 3) // to be changed to 4? { delete dirList; return kTRUE; @@ -865,7 +873,7 @@ const char* AliShuttle::GetRefFilePrefix(const char* base, const char* detector) // TString offDetStr(GetOfflineDetName(detector)); - TString dir; + static TString dir; if (offDetStr == "ITS" || offDetStr == "MUON" || offDetStr == "PHOS") { dir.Form("%s/%s/%s", base, offDetStr.Data(), detector); @@ -874,8 +882,6 @@ const char* AliShuttle::GetRefFilePrefix(const char* base, const char* detector) } return dir.Data(); - - } //______________________________________________________________________________________________ @@ -947,8 +953,12 @@ AliShuttleStatus* AliShuttle::ReadShuttleStatus() } Int_t path1 = GetCurrentRun()/10000; - fStatusEntry = AliCDBManager::Instance()->GetStorage(GetLocalCDB()) - ->Get(Form("/SHUTTLE/%s/%d", fCurrentDetector.Data(), path1), GetCurrentRun()); + try{ + fStatusEntry = AliCDBManager::Instance()->GetStorage(GetLocalCDB()) + ->Get(Form("/SHUTTLE/%s/%d", fCurrentDetector.Data(), path1), GetCurrentRun()); + } catch(std::exception& x) { + AliInfo(TString::Format("%s",x.what())); + } if (!fStatusEntry) return 0; fStatusEntry->SetOwner(1); @@ -983,9 +993,14 @@ Bool_t AliShuttle::WriteShuttleStatus(AliShuttleStatus* status) fStatusEntry = new AliCDBEntry(status, id, new AliCDBMetaData); fStatusEntry->SetOwner(1); + Int_t logLevel = AliLog::GetGlobalLogLevel(); + AliLog::SetGlobalLogLevel(AliLog::kError); + UInt_t result = AliCDBManager::Instance()->GetStorage(fgkLocalCDB)->Put(fStatusEntry); - if (!result) { + AliLog::SetGlobalLogLevel((AliLog::EType_t)logLevel); + + if (!result) { Log("SHUTTLE", Form("WriteShuttleStatus - Failed for %s, run %d", fCurrentDetector.Data(), run)); return kFALSE; @@ -1025,8 +1040,13 @@ void AliShuttle::UpdateShuttleStatus(AliShuttleStatus::Status newStatus, Bool_t status->SetStatus(newStatus); if (increaseCount) status->IncreaseCount(); + Int_t logLevel = AliLog::GetGlobalLogLevel(); + AliLog::SetGlobalLogLevel(AliLog::kError); + AliCDBManager::Instance()->GetStorage(fgkLocalCDB)->Put(fStatusEntry); + AliLog::SetGlobalLogLevel((AliLog::EType_t)logLevel); + SendMLDetInfo(); } @@ -1070,8 +1090,8 @@ Bool_t AliShuttle::ContinueProcessing() dynamic_cast (fPreprocessorMap.GetValue(fCurrentDetector)); if (!aPreprocessor) { - Log("SHUTTLE", Form("ContinueProcessing - %s: no preprocessor registered", fCurrentDetector.Data())); - return kFALSE; + Log("SHUTTLE", Form("ContinueProcessing - %s: no preprocessor registered", fCurrentDetector.Data())); + return kFALSE; } AliShuttleLogbookEntry::Status entryStatus = @@ -1093,7 +1113,7 @@ Bool_t AliShuttle::ContinueProcessing() if (fTestMode == kNone) { Log("SHUTTLE", Form("ContinueProcessing - %s requires strict run ordering" - " but this is not the first unprocessed run!")); + " but this is not the first unprocessed run!",fCurrentDetector.Data())); return kFALSE; } else @@ -1101,7 +1121,7 @@ Bool_t AliShuttle::ContinueProcessing() Log("SHUTTLE", Form("ContinueProcessing - In TESTMODE - " "Although %s requires strict run ordering " "and this is not the first unprocessed run, " - "the SHUTTLE continues")); + "the SHUTTLE continues",fCurrentDetector.Data())); } } @@ -1168,6 +1188,29 @@ Bool_t AliShuttle::ContinueProcessing() // Send mail to detector expert! Log("SHUTTLE", Form("ContinueProcessing - Sending mail to %s expert...", fCurrentDetector.Data())); + // det experts in to + TString to=""; + TIter *iterExperts = 0; + iterExperts = new TIter(fConfig->GetResponsibles(fCurrentDetector)); + TObjString *anExpert=0; + while ((anExpert = (TObjString*) iterExperts->Next())) + { + to += Form("%s, \n", anExpert->GetName()); + } + delete iterExperts; + + if (to.Length() > 0) + to.Remove(to.Length()-3); + AliDebug(2, Form("to: %s",to.Data())); + + if (to.IsNull()) { + Log("SHUTTLE", Form("List of %s responsibles not set!", fCurrentDetector.Data())); + return kFALSE; + } + + Log(fCurrentDetector.Data(), Form("ContinueProcessing - Sending mail to %s expert(s):", + fCurrentDetector.Data())); + Log(fCurrentDetector.Data(), Form("\n%s", to.Data())); if (!SendMail(kPPEMail)) Log("SHUTTLE", Form("ContinueProcessing - Could not send mail to %s expert", fCurrentDetector.Data())); @@ -1179,7 +1222,8 @@ Bool_t AliShuttle::ContinueProcessing() Bool_t increaseCount = kTRUE; if (status->GetStatus() == AliShuttleStatus::kDCSError || status->GetStatus() == AliShuttleStatus::kDCSStarted || - status->GetStatus() == AliShuttleStatus::kFXSError) + status->GetStatus() == AliShuttleStatus::kFXSError || + status->GetStatus() == AliShuttleStatus::kOCDBError) increaseCount = kFALSE; UpdateShuttleStatus(AliShuttleStatus::kStarted, increaseCount); @@ -1203,6 +1247,9 @@ void AliShuttle::SendMLRunInfo(const char* status) runType += fLogbookEntry->GetRunParameter("log"); runType += ")"; } + if (fLogbookEntry->GetDATestMode()){ + runType += " (DATest)"; + } TMonaLisaText mlRunType("SHUTTLE_runtype", runType); TList mlList; @@ -1319,21 +1366,27 @@ Bool_t AliShuttle::Process(AliShuttleLogbookEntry* entry) // Initialization Bool_t hasError = kFALSE; - // Set the CDB and Reference folders according to the year and LHC period - TString lhcPeriod(GetLHCPeriod()); - if (lhcPeriod.Length() == 0) - { - Log("SHUTTLE","Process - LHCPeriod not found in logbook!"); - return 0; - } - - // build cdb paths (repeat each time, LHCperiod might have changed) - fgkMainCDB.Form("alien://folder=%s%d/%s/OCDB?user=alidaq?cacheFold=/tmp/OCDBCache", - fConfig->GetAlienPath(), GetCurrentYear(), lhcPeriod.Data()); - - fgkMainRefStorage.Form("alien://folder=%s%d/%s/Reference?user=alidaq?cacheFold=/tmp/OCDBCache", - fConfig->GetAlienPath(), GetCurrentYear(), lhcPeriod.Data()); - + // Set the CDB and Reference folders according to the year + + // build cdb paths (repeat each time, run might be a DATest run) + if (!fLogbookEntry->GetDATestMode()){ + fgkMainCDB.Form("alien://folder=%s%d/OCDB?user=alidaq?cacheFold=/tmp/OCDBCache", + fConfig->GetAlienPath(), GetCurrentYear()); + + fgkMainRefStorage.Form("alien://folder=%s%d/Reference?user=alidaq?cacheFold=/tmp/OCDBCache", + fConfig->GetAlienPath(), GetCurrentYear()); + } + else { + fgkMainCDB.Form("alien://folder=%s%d/DATest/OCDB?user=alidaq?cacheFold=/tmp/OCDBCache", + fConfig->GetAlienPath(), GetCurrentYear()); + + fgkMainRefStorage.Form("alien://folder=%s%d/DATest/Reference?user=alidaq?cacheFold=/tmp/OCDBCache", + fConfig->GetAlienPath(), GetCurrentYear()); + } + + AliDebug(2,Form("Main CDB storage = %s",fgkMainCDB.Data())); + AliDebug(2,Form("Main Reference storage = %s",fgkMainRefStorage.Data())); + // Loop on detectors in the configuration TIter iter(fConfig->GetDetectors()); TObjString* aDetector = 0; @@ -1360,7 +1413,7 @@ Bool_t AliShuttle::Process(AliShuttleLogbookEntry* entry) Log("SHUTTLE", Form("\t\t\t****** run %d - %s: START ******", GetCurrentRun(), aDetector->GetName())); - for(Int_t iSys=0;iSys<3;iSys++) fFXSCalled[iSys]=kFALSE; + for(Int_t iSys=0;iSys<4;iSys++) fFXSCalled[iSys]=kFALSE; Int_t initialMem = GetMem(getpid()); Log("SHUTTLE", Form("Memory consumption before forking is %d", initialMem)); @@ -1386,17 +1439,29 @@ Bool_t AliShuttle::Process(AliShuttleLogbookEntry* entry) { Long_t expiredTime = time(0) - begin; - if (expiredTime > fConfig->GetPPTimeOut()) + // the run-dependent timeout is the timeout from the configuration plus a twentieth of + // the run duration, e.g. 3 additional minutes for 1h run or 1/2h for a 10h run + Int_t runDepTimeOut = fConfig->GetPPTimeOut() + (GetCurrentEndTime() - GetCurrentStartTime()) * 0.05; + if (expiredTime > runDepTimeOut) { TString logMsg; AliShuttleStatus *currentStatus = ReadShuttleStatus(); AliShuttleStatus::Status newStatus = AliShuttleStatus::kInvalid; - if (currentStatus->GetStatus() <= AliShuttleStatus::kPPDone) + if (currentStatus->GetStatus() == AliShuttleStatus::kDCSStarted) + { + // in case the pp goes in TimeOut while retrieving the DCS DPs + // set status to kDCSError + + logMsg.Form("Process - Process of %s timed out while retrieving the DCS DataPoints. Run time: %ld seconds. Killing... and setting status to DCSError.", + fCurrentDetector.Data(), expiredTime); + newStatus = AliShuttleStatus::kDCSError; + } + else if (currentStatus->GetStatus() <= AliShuttleStatus::kPPDone) { // in case pp not yet done set status to kPPTimeOut - logMsg.Form("Process - Process of %s timed out. Run time: %d seconds. Killing...", + logMsg.Form("Process - Process of %s timed out. Run time: %ld seconds. Killing...", fCurrentDetector.Data(), expiredTime); newStatus = AliShuttleStatus::kPPTimeOut; } @@ -1405,7 +1470,7 @@ Bool_t AliShuttle::Process(AliShuttleLogbookEntry* entry) // in case the pp goes in TimeOut while storing the objects in the OCDB // set status to kStoreError - logMsg.Form("Process - Process of %s timed out while storing the OCDB object. Run time: %d seconds. Killing... and setting status to StoreError.", + logMsg.Form("Process - Process of %s timed out while storing the OCDB object. Run time: %ld seconds. Killing... and setting status to StoreError.", fCurrentDetector.Data(), expiredTime); newStatus = AliShuttleStatus::kStoreError; } @@ -1413,7 +1478,7 @@ Bool_t AliShuttle::Process(AliShuttleLogbookEntry* entry) { // in other cases don't change the status - logMsg.Form("Process - Process of %s timed out in status = %s. Run time: %d seconds. Killing... without changing the status", + logMsg.Form("Process - Process of %s timed out in status = %s. Run time: %ld seconds. Killing... without changing the status", fCurrentDetector.Data(), currentStatus->GetStatusName(), expiredTime); } @@ -1444,7 +1509,7 @@ Bool_t AliShuttle::Process(AliShuttleLogbookEntry* entry) if (expiredTime % 60 == 0) { Log("SHUTTLE", Form("Process - %s: Checking process. " - "Run time: %d seconds - Memory consumption: %d KB", + "Run time: %ld seconds - Memory consumption: %d KB", fCurrentDetector.Data(), expiredTime, mem)); SendAlive(); } @@ -1500,6 +1565,9 @@ Bool_t AliShuttle::Process(AliShuttleLogbookEntry* entry) Log("SHUTTLE", "Process - Could not redirect stderr"); } + + Log("SHUTTLE", "Executing TGrid::Connect"); + TGrid::Connect("alien://"); TString wd = gSystem->WorkingDirectory(); Int_t dir_lev1 = GetCurrentRun()/10000; @@ -1553,11 +1621,11 @@ Bool_t AliShuttle::Process(AliShuttleLogbookEntry* entry) else if (success == 0) { Log("SHUTTLE", - Form("\t\t\t****** run %d - %s: PP ERROR ******", + Form("\t\t\t****** run %d - %s: ERROR ******", GetCurrentRun(), aDetector->GetName())); } - for (UInt_t iSys=0; iSys<3; iSys++) + for (UInt_t iSys=0; iSys<4; iSys++) { if (fFXSCalled[iSys]) fFXSlist[iSys].Clear(); } @@ -1651,6 +1719,15 @@ Int_t AliShuttle::ProcessCurrentDetector() return 2; } + // checking if OCDB is reachable + AliCDBEntry* testEntry = GetFromOCDB("SHUTTLE","GRP/CTP/DummyConfig"); + if (!testEntry){ + // OCDB is not accessible, going in OCDBError for current detector + AliError("OCDB Test entry not accessible"); + UpdateShuttleStatus(AliShuttleStatus::kOCDBError); + return 0; + } + TMap* dcsMap = new TMap(); aPreprocessor->Initialize(GetCurrentRun(), GetCurrentStartTime(), GetCurrentEndTime()); @@ -1815,13 +1892,13 @@ void AliShuttle::CountOpenRuns() SendAlive(); // check connection, in case connect - if (!Connect(3)) + if (!Connect(4)) return; TString sqlQuery; sqlQuery = Form("select count(*) from %s where shuttle_done=0", fConfig->GetShuttlelbTable()); - TSQLResult* aResult = fServer[3]->Query(sqlQuery); + TSQLResult* aResult = fServer[4]->Query(sqlQuery); if (!aResult) { AliError(Form("Can't execute query <%s>!", sqlQuery.Data())); return; @@ -1872,12 +1949,12 @@ Bool_t AliShuttle::QueryShuttleLogbook(const char* whereClause, entries.SetOwner(1); // check connection, in case connect - if (!Connect(3)) return kFALSE; + if (!Connect(4)) return kFALSE; TString sqlQuery; sqlQuery = Form("select * from %s %s order by run", fConfig->GetShuttlelbTable(), whereClause); - TSQLResult* aResult = fServer[3]->Query(sqlQuery); + TSQLResult* aResult = fServer[4]->Query(sqlQuery); if (!aResult) { AliError(Form("Can't execute query <%s>!", sqlQuery.Data())); return kFALSE; @@ -1892,7 +1969,7 @@ Bool_t AliShuttle::QueryShuttleLogbook(const char* whereClause, } // TODO Check field count! - const UInt_t nCols = 23; + const UInt_t nCols = 27; if (aResult->GetFieldCount() != (Int_t) nCols) { Log("SHUTTLE", "Invalid SQL result field number!"); delete aResult; @@ -1908,6 +1985,11 @@ Bool_t AliShuttle::QueryShuttleLogbook(const char* whereClause, if (!entry) continue; + // DA test mode flag + TString daTestModeString(aRow->GetField(2), aRow->GetFieldLength(2)); // field 2 = DA test mode flag + Bool_t daTestMode = (Bool_t)daTestModeString.Atoi(); + entry->SetDATestMode(daTestMode); + // loop on detectors for(UInt_t ii = 0; ii < nCols; ii++) entry->SetDetectorStatus(aResult->GetFieldName(ii), aRow->GetField(ii)); @@ -1928,13 +2010,13 @@ AliShuttleLogbookEntry* AliShuttle::QueryRunParameters(Int_t run) // // check connection, in case connect - if (!Connect(3)) + if (!Connect(4)) return 0; TString sqlQuery; sqlQuery.Form("select * from %s where run=%d", fConfig->GetDAQlbTable(), run); - TSQLResult* aResult = fServer[3]->Query(sqlQuery); + TSQLResult* aResult = fServer[4]->Query(sqlQuery); if (!aResult) { Log("SHUTTLE", Form("Can't execute query <%s>!", sqlQuery.Data())); return 0; @@ -1972,38 +2054,58 @@ AliShuttleLogbookEntry* AliShuttle::QueryRunParameters(Int_t run) UInt_t startTime = entry->GetStartTime(); UInt_t endTime = entry->GetEndTime(); Bool_t ecsSuccess = entry->GetECSSuccess(); - - TString totEventsStr = entry->GetRunParameter("totalEvents"); - Int_t totEvents = totEventsStr.Atoi(); + TString runType = entry->GetRunType(); + TString tmpdaqstartTime = entry->GetRunParameter("DAQ_time_start"); + TString recordingFlagString = entry->GetRunParameter("GDCmStreamRecording"); + UInt_t recordingFlag = recordingFlagString.Atoi(); + UInt_t daqstartTime = tmpdaqstartTime.Atoi(); UInt_t now = time(0); - // TODO make this a configuration parameter Int_t dcsDelay = fConfig->GetDCSDelay()+fConfig->GetDCSQueryOffset(); - - // runs are accepted if they have ecsSuccess set or more than 1 event - if (startTime != 0 && endTime != 0 && endTime > startTime && (totEvents > 1 || ecsSuccess) && (endTime < now - dcsDelay)) - { - if (ecsSuccess == kFALSE) - Log("SHUTTLE", Form("Processing run %d although in status ECS failure, Reason: %s", run, entry->GetRunParameter("eor_reason"))); - return entry; - } Bool_t skip = kFALSE; - if (endTime != 0 && endTime >= now - dcsDelay) - { - Log("SHUTTLE", Form("Skipping run %d for now, because DCS buffer time is not yet expired", run)); - } - else if (totEvents <= 1) - { - Log("SHUTTLE", Form("QueryRunParameters - Run %d has 1 event or less - Skipping!", run)); - skip = kTRUE; - } - else - { - Log("SHUTTLE", Form("QueryRunParameters - Invalid parameters for Run %d: " - "startTime = %d, endTime = %d. Skipping (Shuttle won't be marked as DONE)!", - run, startTime, endTime)); + // runs are processed if + // a) runType is PHYSICS and ecsSuccess is set + // b) runType is not PHYSICS and (ecsSuccess is set or DAQ_time_start is non-0) + // effectively this means that all runs are processed that started properly (ecsSucess behaviour is different for PHYSICS and non-PHYSICS runs (check with ECS!) + if (startTime != 0 && endTime != 0) { + if (endTime > startTime) { + if (endTime >= now - dcsDelay) { + Log("SHUTTLE", Form("Skipping run %d for now, because DCS buffer time is not yet expired", run)); + } else { + if ((runType == "PHYSICS" || runType == "STANDALONE") && recordingFlag == 0){ + Log("SHUTTLE", Form("QueryRunParameters - Run type for run %d is %s but the recording is OFF - Skipping!", run, runType.Data())); + skip = kTRUE; + } + else { + if (runType == "PHYSICS") { + if (ecsSuccess) { + return entry; + } else { + Log("SHUTTLE", Form("QueryRunParameters - Run type for run %d is PHYSICS but ECS success flag not set (Reason = %s) - Skipping!", run, entry->GetRunParameter("eor_reason"))); + skip = kTRUE; + } + } else { + if (ecsSuccess || daqstartTime > 0) { + if (ecsSuccess == kFALSE) + Log("SHUTTLE", Form("Processing run %d although in status ECS failure (Reason: %s), since run type != PHYSICS and DAQ_time_start != 0", run, entry->GetRunParameter("eor_reason"))); + return entry; + } else { + Log("SHUTTLE", Form("QueryRunParameters - Run type for run %d is %s, ECS success flag was not set (Reason = %s) and DAQ_time_start was NULL - Skipping!", run, runType.Data(), entry->GetRunParameter("eor_reason"))); + skip = kTRUE; + } + } + } + } + } else { + Log("SHUTTLE", Form("QueryRunParameters - Invalid parameters for run %d: startTime equal to endTime: %d %d - Skipping!", run, startTime, endTime)); + skip = kTRUE; + } + } else { + Log("SHUTTLE", Form("QueryRunParameters - Invalid parameters for Run %d: " + "startTime = %d, endTime = %d. Skipping (Shuttle won't be marked as DONE)!", + run, startTime, endTime)); } if (skip) @@ -2038,19 +2140,25 @@ TMap* AliShuttle::GetValueSet(const char* host, Int_t port, const TSeqCollection AliDCSClient client(host, port, fTimeout, fRetries, multiSplit); TMap* result = 0; + UInt_t startQuery = GetStartTimeDCSQuery(); + UInt_t endQuery = GetEndTimeDCSQuery(); + if (fCurrentDetector == "GRP" && (endQuery - startQuery) <= 120) { // enlarging DCS query for GRP when a run is shorter than 2 minutes (i.e. the time of forced archival of GRP DPs) + Log(fCurrentDetector.Data(), Form("GetValueSet: run lasting less than 120 seconds, enlarging DCS window for DPs retrival to 130 s")); + startQuery = endQuery - 130; // we add 130 s to be sure that there is something (even if the archival is forced after 120 s) + } + if (type == kAlias) { //result = client.GetAliasValues(entries, GetCurrentStartTime()-offset, // GetCurrentEndTime()+offset); - result = client.GetAliasValues(entries, GetStartTimeDCSQuery(), - GetEndTimeDCSQuery()); + + result = client.GetAliasValues(entries, startQuery, endQuery); } else if (type == kDP) { //result = client.GetDPValues(entries, GetCurrentStartTime()-offset, // GetCurrentEndTime()+offset); - result = client.GetDPValues(entries, GetStartTimeDCSQuery(), - GetEndTimeDCSQuery()); + result = client.GetDPValues(entries, startQuery, endQuery); } if (result == 0) @@ -2102,7 +2210,7 @@ const char* AliShuttle::GetFile(Int_t system, const char* detector, TString whereClause = Form("run=%d and detector=\"%s\" and fileId=\"%s\"", GetCurrentRun(), detector, id); - if (system == kDAQ) + if (system == kDAQ || system == kDQM) { whereClause += Form(" and DAQsource=\"%s\"", source); } @@ -2340,8 +2448,8 @@ TList* AliShuttle::GetFileSources(Int_t system, const char* detector, const char return NULL; } - TString sourceName = 0; - if (system == kDAQ) + TString sourceName = ""; + if (system == kDAQ || system == kDQM) { sourceName = "DAQsource"; } else if (system == kHLT) @@ -2419,7 +2527,7 @@ TList* AliShuttle::GetFileIDs(Int_t system, const char* detector, const char* so return NULL; } - TString sourceName = 0; + TString sourceName = ""; if (system == kDAQ) { sourceName = "DAQsource"; @@ -2483,11 +2591,24 @@ Bool_t AliShuttle::Connect(Int_t system) // // check connection: if already connected return - if(fServer[system] && fServer[system]->IsConnected()) return kTRUE; + + if(fServer[system] && fServer[system]->IsConnected()) { + // ping the server + if (fServer[system]->PingVerify()==kTRUE){ // connection is still alive + return kTRUE; + } + else{ + AliWarning(Form("Connection got lost to FXS database for %s. Closing and reconnecting.", + AliShuttleInterface::GetSystemName(system))); + fServer[system]->Close(); + delete fServer[system]; + fServer[system] = 0x0; + } + } TString dbHost, dbUser, dbPass, dbName; - if (system < 3) // FXS db servers + if (system < 4) // FXS db servers { dbHost = Form("mysql://%s:%d", fConfig->GetFXSdbHost(system), fConfig->GetFXSdbPort(system)); dbUser = fConfig->GetFXSdbUser(system); @@ -2502,8 +2623,8 @@ Bool_t AliShuttle::Connect(Int_t system) } fServer[system] = TSQLServer::Connect(dbHost.Data(), dbUser.Data(), dbPass.Data()); - if (!fServer[system] || !fServer[system]->IsConnected()) { - if(system < 3) + if (!fServer[system] || !fServer[system]->IsConnected()) { + if(system < 4) { AliError(Form("Can't establish connection to FXS database for %s", AliShuttleInterface::GetSystemName(system))); @@ -2526,8 +2647,11 @@ Bool_t AliShuttle::Connect(Int_t system) case kHLT: aResult = fServer[kHLT]->GetTables(dbName.Data()); break; + case kDQM: + aResult = fServer[kDQM]->GetTables(dbName.Data()); + break; default: - aResult = fServer[3]->GetTables(dbName.Data()); + aResult = fServer[4]->GetTables(dbName.Data()); break; } @@ -2544,7 +2668,7 @@ Bool_t AliShuttle::UpdateTable() Bool_t result = kTRUE; - for (UInt_t system=0; system<3; system++) + for (UInt_t system=0; system<4; system++) { if(!fFXSCalled[system]) continue; @@ -2577,7 +2701,7 @@ Bool_t AliShuttle::UpdateTable() const char* source = ((TObjString*) aFXSarray->At(1))->GetName(); TString whereClause; - if (system == kDAQ) + if (system == kDAQ || system == kDQM) { whereClause = Form("where run=%d and detector=\"%s\" and fileId=\"%s\" and DAQsource=\"%s\";", GetCurrentRun(), fCurrentDetector.Data(), fileId, source); @@ -2595,8 +2719,8 @@ Bool_t AliShuttle::UpdateTable() delete aFXSarray; - TString sqlQuery = Form("update %s set time_processed=%d %s", fConfig->GetFXSdbTable(system), - now.GetSec(), whereClause.Data()); + TString sqlQuery = Form("update %s set time_processed=%ld %s", fConfig->GetFXSdbTable(system), + (ULong_t)now.GetSec(), whereClause.Data()); AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data())); @@ -2627,7 +2751,9 @@ Bool_t AliShuttle::UpdateTableSkippedCase(const char* detector) Bool_t result = kTRUE; - for (UInt_t system=0; system<3; system++) + TString detName(detector); + + for (UInt_t system=0; system<4; system++) { // check connection, in case connect @@ -2644,13 +2770,13 @@ Bool_t AliShuttle::UpdateTableSkippedCase(const char* detector) TIter iter(&fFXSlist[system]); TString whereClause; - if (detector == "ALL") whereClause = Form("where run=%d and time_processed IS NULL;",GetCurrentRun()); + if (detName == "ALL") whereClause = Form("where run=%d and time_processed IS NULL;",GetCurrentRun()); else whereClause = Form("where run=%d and detector=\"%s\" and time_processed IS NULL;",GetCurrentRun(), detector); - Log("SHUTTLE",Form(" whereClause = %s ",whereClause.Data())); + //Log("SHUTTLE",Form(" whereClause = %s ",whereClause.Data())); - TString sqlQuery = Form("update %s set time_processed=%d %s", fConfig->GetFXSdbTable(system), - now.GetSec(), whereClause.Data()); + TString sqlQuery = Form("update %s set time_processed=%ld %s", fConfig->GetFXSdbTable(system), + (ULong_t)now.GetSec(), whereClause.Data()); AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data())); @@ -2679,7 +2805,7 @@ Bool_t AliShuttle::UpdateTableFailCase() Bool_t result = kTRUE; - for (UInt_t system=0; system<3; system++) + for (UInt_t system=0; system<4; system++) { // check connection, in case connect if (!Connect(system)) @@ -2698,8 +2824,8 @@ Bool_t AliShuttle::UpdateTableFailCase() GetCurrentRun(), fCurrentDetector.Data()); - TString sqlQuery = Form("update %s set time_processed=%d %s", fConfig->GetFXSdbTable(system), - now.GetSec(), whereClause.Data()); + TString sqlQuery = Form("update %s set time_processed=%ld %s", fConfig->GetFXSdbTable(system), + (ULong_t)now.GetSec(), whereClause.Data()); AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data())); @@ -2728,7 +2854,7 @@ Bool_t AliShuttle::UpdateShuttleLogbook(const char* detector, const char* status // // check connection, in case connect - if(!Connect(3)){ + if(!Connect(4)){ Log("SHUTTLE", "UpdateShuttleLogbook - Couldn't connect to DAQ Logbook."); return kFALSE; } @@ -2774,7 +2900,7 @@ Bool_t AliShuttle::UpdateShuttleLogbook(const char* detector, const char* status // Query execution TSQLResult* aResult; - aResult = dynamic_cast (fServer[3]->Query(sqlQuery)); + aResult = dynamic_cast (fServer[4]->Query(sqlQuery)); if (!aResult) { Log("SHUTTLE", Form("UpdateShuttleLogbook - Can't execute query <%s>", sqlQuery.Data())); return kFALSE; @@ -2813,16 +2939,6 @@ UInt_t AliShuttle::GetCurrentEndTime() const return fLogbookEntry ? fLogbookEntry->GetEndTime() : 0; } -//______________________________________________________________________________________________ -UInt_t AliShuttle::GetCurrentTimeCreated() const -{ - // - // get current end time from logbook entry - // - - return fLogbookEntry ? fLogbookEntry->GetTimeCreated() : 0; -} - //______________________________________________________________________________________________ UInt_t AliShuttle::GetCurrentYear() const { @@ -2852,7 +2968,7 @@ const char* AliShuttle::GetLHCPeriod() const } //______________________________________________________________________________________________ -void AliShuttle::Log(const char* detector, const char* message) +void AliShuttle::Log(const char* detector, const char* message, UInt_t level) { // // Fill log string with a message @@ -2879,7 +2995,7 @@ void AliShuttle::Log(const char* detector, const char* message) toLog += Form("run %d - ", GetCurrentRun()); toLog += Form("%s", message); - AliInfo(toLog.Data()); + AliLog::Message(level, toLog, MODULENAME(), ClassName(), FUNCTIONNAME(), __FILE__, __LINE__); // if we redirect the log output already to the file, leave here if (fOutputRedirected && strcmp(detector, "SHUTTLE") != 0) @@ -3154,7 +3270,7 @@ Bool_t AliShuttle::SendMail(EMailTarget target, Int_t system) if (target == kDCSEMail || target == kFXSEMail) { if (!fFirstProcessing) - return kTRUE; + return kTRUE; } Int_t runMode = (Int_t)fConfig->GetRunMode(); @@ -3257,10 +3373,12 @@ Bool_t AliShuttle::SendMail(EMailTarget target, Int_t system) if (system == kDAQ) sys="DAQ"; else if (system == kDCS) sys="DCS"; else if (system == kHLT) sys="HLT"; + else if (system == kDQM) sys="DQM"; else return kFALSE; body = Form("Dear %s FXS experts, \n\n",sys.Data()); body += Form("SHUTTLE couldn\'t retrieve data from the FXS for detector %s " "in run %d!!\n\n", fCurrentDetector.Data(), GetCurrentRun()); + body += Form("The contacted server was:\nDB: %s\nFXS:%s\n\n", fConfig->GetFXSdbHost(system), fConfig->GetFXSHost(system)); } else { subject = Form("%s %s Shuttle preprocessor FAILED in run %d (run type = %s)!", @@ -3289,10 +3407,10 @@ Bool_t AliShuttle::SendMail(EMailTarget target, Int_t system) body += Form("Find the %s log for the current run on \n\n" - "\thttp://pcalishuttle01.cern.ch:8880/%s/%d/%d/%s.log \n\n", + "\thttp://pcalishuttle02.cern.ch/%s/%d/%d/%s.log \n\n", fCurrentDetector.Data(), logFolder.Data(), GetCurrentRun()/10000, GetCurrentRun(), fCurrentDetector.Data()); - body += Form("The last 10 lines of %s log file are following:\n\n", fCurrentDetector.Data()); + body += Form("The last 15 lines of %s log file are following:\n\n", fCurrentDetector.Data()); AliDebug(2, Form("Body begin: %s", body.Data())); @@ -3302,7 +3420,7 @@ Bool_t AliShuttle::SendMail(EMailTarget target, Int_t system) TString logFileName = Form("%s/%d/%d/%s.log", GetShuttleLogDir(), GetCurrentRun()/10000, GetCurrentRun(), fCurrentDetector.Data()); - TString tailCommand = Form("tail -n 10 %s >> %s", logFileName.Data(), bodyFileName.Data()); + TString tailCommand = Form("tail -n 15 %s >> %s", logFileName.Data(), bodyFileName.Data()); if (gSystem->Exec(tailCommand.Data())) { mailBody << Form("%s log file not found ...\n\n", fCurrentDetector.Data()); @@ -3331,7 +3449,6 @@ Bool_t AliShuttle::SendMail(EMailTarget target, Int_t system) return result == 0; } - //______________________________________________________________________________________________ const char* AliShuttle::GetRunType() { @@ -3380,12 +3497,12 @@ const char* AliShuttle::GetTriggerConfiguration() // Receives the trigger configuration from the DAQ logbook for the current run // check connection, if needed reconnect - if (!Connect(3)) + if (!Connect(4)) return 0; TString sqlQuery; sqlQuery.Form("SELECT configFile FROM logbook_trigger_config WHERE run = %d", GetCurrentRun()); - TSQLResult* result = fServer[3]->Query(sqlQuery); + TSQLResult* result = fServer[4]->Query(sqlQuery); if (!result) { Log("SHUTTLE", Form("ERROR: Can't execute query <%s>!", sqlQuery.Data())); @@ -3394,7 +3511,7 @@ const char* AliShuttle::GetTriggerConfiguration() if (result->GetRowCount() == 0) { - Log("SHUTTLE", "ERROR: Trigger configuration not found in logbook_trigger_config"); + Log("SHUTTLE", "WARNING: Trigger configuration not found in logbook_trigger_config"); delete result; return 0; } @@ -3421,6 +3538,100 @@ const char* AliShuttle::GetTriggerConfiguration() return triggerConfig; } +//______________________________________________________________________________________________ +const char* AliShuttle::GetCTPTimeParams() +{ + // Receives the CTP time parameters from the DAQ logbook for the current run + + // check connection, if needed reconnect + if (!Connect(4)) + return 0; + + TString sqlQuery; + sqlQuery.Form("SELECT alignmentFile FROM logbook_trigger_config WHERE run = %d", GetCurrentRun()); + TSQLResult* result = fServer[4]->Query(sqlQuery); + if (!result) + { + Log("SHUTTLE", Form("ERROR: Can't execute query <%s>!", sqlQuery.Data())); + return 0; + } + + if (result->GetRowCount() == 0) + { + Log("SHUTTLE", "WARNING: CTP time params not found in logbook_trigger_config"); + delete result; + return 0; + } + + TSQLRow* row = result->Next(); + if (!row) + { + Log("SHUTTLE", "ERROR: Could not receive logbook_trigger_config data"); + delete result; + return 0; + } + + // static, so that pointer remains valid when it is returned to the calling class + static TString triggerTimeParams(row->GetField(0)); + + delete row; + row = 0; + + delete result; + result = 0; + + Log("SHUTTLE", Form("Found trigger time parameters: %s", triggerTimeParams.Data())); + + return triggerTimeParams; +} + +//______________________________________________________________________________________________ +const char* AliShuttle::GetTriggerDetectorMask() +{ + // Receives the trigger detector mask from DAQ logbook + + // check connection, if needed reconnect + if (!Connect(4)) + return 0; + + TString sqlQuery; + sqlQuery.Form("SELECT BIN(BIT_OR(inputDetectorMask)) from logbook_trigger_clusters WHERE run = %d;", GetCurrentRun()); + TSQLResult* result = fServer[4]->Query(sqlQuery); + if (!result) + { + Log("SHUTTLE", Form("ERROR: Can't execute query <%s>!", sqlQuery.Data())); + return 0; + } + + if (result->GetRowCount() == 0) + { + Log("SHUTTLE", "ERROR: Trigger Detector Mask not found in logbook_trigger_clusters"); + delete result; + return 0; + } + + TSQLRow* row = result->Next(); + if (!row) + { + Log("SHUTTLE", "ERROR: Could not receive logbook_trigger_clusters data"); + delete result; + return 0; + } + + // static, so that pointer remains valid when it is returned to the calling class + static TString triggerDetectorMask(row->GetField(0)); + + delete row; + row = 0; + + delete result; + result = 0; + + Log("SHUTTLE", Form("Found Trigger Detector Mask: %s", triggerDetectorMask.Data())); + + return triggerDetectorMask; +} + //______________________________________________________________________________________________ void AliShuttle::SetShuttleTempDir(const char* tmpDir) { @@ -3455,24 +3666,16 @@ Bool_t AliShuttle::TouchFile() } TString dir; - dir.Form("%s%d/%s/SHUTTLE_DONE", fConfig->GetAlienPath(), GetCurrentYear(), GetLHCPeriod()); + dir.Form("%s%d/SHUTTLE_DONE", fConfig->GetAlienPath(), GetCurrentYear()); // checking whether directory for touch command exists - TString commandLs; - commandLs.Form("ls %s",dir.Data()); - TGridResult *resultLs = dynamic_cast(gGrid->Command(commandLs)); - if (!resultLs){ - Log("SHUTTLE",Form("No result for %s command, returning without touching",commandLs.Data())); + TGridResult* resultLs = gGrid->Ls(dir.Data()); + if (!resultLs){ // unfortunately we don't get this for ls of a non existing dir + Log("SHUTTLE",Form("No result for \"Ls(\"%s\")\", returning without touching", dir.Data())); return kFALSE; } - TMap *mapLs = dynamic_cast(resultLs->At(0)); - if (!mapLs){ - Log("SHUTTLE",Form("No map for %s command, returning without touching",commandLs.Data())); - delete resultLs; - resultLs = 0x0; - return kFALSE; - } - TObjString *valueLsPath = dynamic_cast(mapLs->GetValue("path")); - if (!valueLsPath || (TString)(valueLsPath->GetString()).CompareTo(dir)!=1){ + + if ( resultLs->GetEntries() == 1 && !resultLs->GetFileName(0) ) { + // this is what we currently get for ls of a non existing dir Log("SHUTTLE",Form("No directory %s found, creating it",dir.Data())); // creating the directory @@ -3493,50 +3696,74 @@ Bool_t AliShuttle::TouchFile() delete resultLs; resultLs = 0x0; - TString command; - command.Form("touch %s/%i", dir.Data(), GetCurrentRun()); - Log("SHUTTLE", Form("Creating entry in file catalog: %s", command.Data())); - TGridResult *resultTouch = dynamic_cast(gGrid->Command(command)); - if (!resultTouch){ - Log("SHUTTLE",Form("No result for touching command, returning without touching for run %i",GetCurrentRun())); - return kFALSE; - } - TMap *mapTouch = dynamic_cast(resultTouch->At(0)); - if (!mapTouch){ - Log("SHUTTLE",Form("No map for touching command, returning without touching for run %i",GetCurrentRun())); - delete resultTouch; - resultTouch = 0x0; - return kFALSE; - } - TObjString *valueTouch = dynamic_cast(mapTouch->GetValue("__result__")); - if (!valueTouch){ - Log("SHUTTLE",Form("No value for \"__result__\" key set in the map for touching command, returning without touching for run %i",GetCurrentRun())); - delete resultTouch; - resultTouch = 0x0; + // Before trying to touch, check that the file is not already there (the touch would fail forever, leaving the run in pending) + TString lsFileCommand; + lsFileCommand.Form("ls %s/%i", dir.Data(), GetCurrentRun()); + TGridResult *resultLsFile = dynamic_cast(gGrid->Command(lsFileCommand)); + if (!resultLsFile){ + Log("SHUTTLE",Form("No result for file ls command, returning without touching for run %i",GetCurrentRun())); return kFALSE; } - if (valueTouch->GetString()!="1"){ - Log("SHUTTLE",Form("Failing the touching command, returning without touching for run %i",GetCurrentRun())); - delete resultTouch; - resultTouch = 0x0; + TMap *mapLsFile = dynamic_cast(resultLsFile->At(0)); + if (!mapLsFile){ + Log("SHUTTLE",Form("No map for file ls command, returning without touching for run %i",GetCurrentRun())); + delete resultLsFile; + resultLsFile = 0x0; return kFALSE; } - delete resultTouch; - resultTouch = 0x0; - Log("SHUTTLE", "Sucessfully touched the file"); - return kTRUE; + TObjString *valueLsFile = dynamic_cast(mapLsFile->GetValue("name")); + if (valueLsFile){ + Log("SHUTTLE",Form("\"name\" key set in the map for file ls command. Touchfile for run %i already there.",GetCurrentRun())); + Log("SHUTTLE", "The file was already there, did not touch it."); + }else{ + TString command; + command.Form("touch %s/%i", dir.Data(), GetCurrentRun()); + Log("SHUTTLE", Form("Creating entry in file catalog: %s", command.Data())); + TGridResult *resultTouch = dynamic_cast(gGrid->Command(command)); + if (!resultTouch){ + Log("SHUTTLE",Form("No result for touching command, returning without touching for run %i",GetCurrentRun())); + return kFALSE; + } + TMap *mapTouch = dynamic_cast(resultTouch->At(0)); + if (!mapTouch){ + Log("SHUTTLE",Form("No map for touching command, returning without touching for run %i",GetCurrentRun())); + delete resultTouch; + resultTouch = 0x0; + return kFALSE; + } + TObjString *valueTouch = dynamic_cast(mapTouch->GetValue("__result__")); + if (!valueTouch){ + Log("SHUTTLE",Form("No value for \"__result__\" key set in the map for touching command, returning without touching for run %i",GetCurrentRun())); + delete resultTouch; + resultTouch = 0x0; + return kFALSE; + } + if (valueTouch->GetString()!="1"){ + Log("SHUTTLE",Form("Failing the touching command, returning without touching for run %i",GetCurrentRun())); + delete resultTouch; + resultTouch = 0x0; + return kFALSE; + } + delete resultLsFile; + resultLsFile = 0x0; + delete resultTouch; + resultTouch = 0x0; + Log("SHUTTLE", "Sucessfully touched the file"); + } + return kTRUE; } + //______________________________________________________________________________________________ -const UInt_t AliShuttle::GetStartTimeDCSQuery() +UInt_t AliShuttle::GetStartTimeDCSQuery() { // Return Start Time for the DCS query // // The call is delegated to AliShuttleInterface - return GetCurrentTimeCreated()-fConfig->GetDCSQueryOffset(); + return GetCurrentStartTime()-fConfig->GetDCSQueryOffset(); } //______________________________________________________________________________________________ -const UInt_t AliShuttle::GetEndTimeDCSQuery() +UInt_t AliShuttle::GetEndTimeDCSQuery() { // Return End Time for the DCS query // @@ -3544,5 +3771,64 @@ const UInt_t AliShuttle::GetEndTimeDCSQuery() return GetCurrentEndTime()+fConfig->GetDCSQueryOffset(); } +//______________________________________________________________________________________________ +void AliShuttle::SendMLFromDet(const char* value) +{ + // + // Sending an information coming from the current detector to ML + // + + TMonaLisaText mlText(Form("%s_RunCondition", fCurrentDetector.Data()), value); + TList mlList; + mlList.Add(&mlText); + TString mlID; + mlID.Form("%d", GetCurrentRun()); + fMonaLisa->SendParameters(&mlList, mlID); + + return; +} +//______________________________________________________________________________________________ +TString* AliShuttle::GetLTUConfig(const char* det) +{ + // + // Getting ltuFineDelay1, ltuFineDelay2, ltuBCDelay for detector det from logbook_detectors table in logbook + // + + if (!Connect(4)) + return 0; + + TString sqlQuery; + sqlQuery.Form("select LTUFineDelay1, LTUFineDelay2, LTUBCDelayAdd from logbook_detectors WHERE run_number = %d and detector = \"%s\";", GetCurrentRun(),det); + + TSQLResult* result = fServer[4]->Query(sqlQuery); + if (!result){ + Log("SHUTTLE","ERROR: No result found for the LTU configuration query"); + return 0x0; + } + if (result->GetRowCount() == 0){ + Log("SHUTTLE",Form("ERROR: LTU configuration not found in logbook_detectors for detector %s, returning null pointer",det)); + delete result; + return 0x0; + } + if (result->GetFieldCount() != 3){ + Log("SHUTTLE",Form("ERROR: not all the required fields are there for the LTU configuration for detector %s (only %d found), returning a null pointer",det, result->GetFieldCount())); + delete result; + return 0x0; + } + TSQLRow* row = result->Next(); + if (!row){ + Printf("ERROR: Could not receive logbook_detectors data, returning null pointer"); + delete result; + return 0x0; + } + TString* ltuConfigString = new TString[3]; + + ltuConfigString[0] = row->GetField(0); + ltuConfigString[1] = row->GetField(1); + ltuConfigString[2] = row->GetField(2); + + return ltuConfigString; + +}