#include <TFile.h>
#include <TGrid.h>
#include <TGridResult.h>
+#include <TMap.h>
#include <TMonaLisaWriter.h>
fLogbookEntry(0),
fCurrentDetector(),
fFirstProcessing(0),
+fFXSError(-1),
fStatusEntry(0),
fMonitoringMutex(0),
fLastActionTime(0),
// Then calls StoreRefFilesToGrid to store reference files.
//
+ UpdateShuttleStatus(AliShuttleStatus::kStoreStarted);
+
if (fTestMode & kErrorGrid)
{
Log("SHUTTLE", "StoreOCDB - In TESTMODE - Simulating error while storing in the Grid");
}
Log("SHUTTLE","StoreOCDB - Storing OCDB data ...");
- Bool_t resultCDB = StoreOCDB(fgkMainCDB);
+ Int_t resultCDB = StoreOCDB(fgkMainCDB);
Log("SHUTTLE","StoreOCDB - Storing reference data ...");
- Bool_t resultRef = StoreOCDB(fgkMainRefStorage);
+ Int_t resultRef = StoreOCDB(fgkMainRefStorage);
Log("SHUTTLE","StoreOCDB - Storing reference files ...");
Bool_t resultRefFiles = CopyFilesToGrid("reference");
resultMetadata = CopyFilesToGrid("metadata");
}
- return resultCDB && resultRef && resultRefFiles && resultMetadata;
+ Int_t storeResult = 0;
+
+ if (resultCDB < 0 || resultRef < 0 || resultRefFiles == kFALSE || resultMetadata == kFALSE)
+ storeResult = -1;
+ else if (resultCDB > 0 || resultRef > 0)
+ storeResult = 1;
+
+ if (storeResult < 0)
+ {
+ Log("SHUTTLE",
+ Form("\t\t\t****** run %d - %s: STORAGE ERROR ******",
+ GetCurrentRun(), fCurrentDetector.Data()));
+ UpdateShuttleStatus(AliShuttleStatus::kStoreError);
+ }
+ else if (storeResult > 0)
+ {
+ Log("SHUTTLE",
+ Form("\t\t\t****** run %d - %s: STORAGE DELAYED ******",
+ GetCurrentRun(), fCurrentDetector.Data()));
+ UpdateShuttleStatus(AliShuttleStatus::kStoreDelayed);
+ }
+ else if (storeResult == 0)
+ {
+ Log("SHUTTLE",
+ Form("\t\t\t****** run %d - %s: DONE ******",
+ GetCurrentRun(), fCurrentDetector.Data()));
+ UpdateShuttleStatus(AliShuttleStatus::kDone);
+ UpdateShuttleLogbook(fCurrentDetector, "DONE");
+ }
+
+ return (storeResult == 0);
}
//______________________________________________________________________________________________
-Bool_t AliShuttle::StoreOCDB(const TString& gridURI)
+Int_t AliShuttle::StoreOCDB(const TString& gridURI)
{
//
// Called by StoreOCDB(), performs actual storage to the main OCDB and reference storages (Grid)
//
+ // Return code:
+ // -2 initialization error
+ // -1 storage error
+ // 0 success
+ // 1 storage delayed (e.g. previous unprocessed runs)
+ //
TObjArray* gridIds=0;
Bool_t result = kTRUE;
+ Bool_t delayed = kFALSE;
const char* type = 0;
TString localURI;
localURI = fgkLocalRefStorage;
} else {
AliError(Form("Invalid storage URI: %s", gridURI.Data()));
- return kFALSE;
+ return -2;
}
AliCDBManager* man = AliCDBManager::Instance();
if(!gridSto) {
Log("SHUTTLE",
Form("StoreOCDB - cannot activate main %s storage", type));
- return kFALSE;
+ return -2;
}
gridIds = gridSto->GetQueryCDBList();
if(!localSto) {
Log("SHUTTLE",
Form("StoreOCDB - cannot activate local %s storage", type));
- return kFALSE;
+ return -2;
}
AliCDBPath aPath(GetOfflineDetName(fCurrentDetector.Data()),"*","*");
// Local objects were stored with current run as Grid version!
Log("SHUTTLE", Form("StoreOCDB - %s: object %s has validity infinite but "
"there are previous unprocessed runs!",
fCurrentDetector.Data(), aLocId.GetPath().Data()));
- result = kFALSE;
+ Log(fCurrentDetector.Data(), Form("StoreOCDB - %s: object %s has validity infinite but "
+ "there are previous unprocessed runs!",
+ fCurrentDetector.Data(), aLocId.GetPath().Data()));
+ delayed = kTRUE;
continue;
}
}
localEntries->Clear();
- return result;
+ Int_t returnCode = 0;
+
+ if (result == kFALSE)
+ returnCode = -1;
+ else if (delayed != kFALSE)
+ returnCode = 1;
+
+ Log("SHUTTLE", Form("StoreOCDB - Returning with %d (result = %d, delayed = %d)", returnCode, result, delayed));
+ Log(fCurrentDetector.Data(), Form("StoreOCDB - Returning with %d (result = %d, delayed = %d)", returnCode, result, delayed));
+
+ return returnCode;
}
//______________________________________________________________________________________________
AliShuttleLogbookEntry::Status entryStatus =
fLogbookEntry->GetDetectorStatus(fCurrentDetector);
- if(entryStatus != AliShuttleLogbookEntry::kUnprocessed) {
+ if (entryStatus != AliShuttleLogbookEntry::kUnprocessed) {
Log("SHUTTLE", Form("ContinueProcessing - %s is %s",
fCurrentDetector.Data(),
fLogbookEntry->GetDetectorStatusName(entryStatus)));
return WriteShuttleStatus(status);
}
- // The following two cases shouldn't happen if Shuttle Logbook was correctly updated.
+ // The following case shouldn't happen if Shuttle Logbook was correctly updated.
// If it happens it may mean Logbook updating failed... let's do it now!
if (status->GetStatus() == AliShuttleStatus::kDone ||
- status->GetStatus() == AliShuttleStatus::kFailed){
+ status->GetStatus() == AliShuttleStatus::kFailed ||
+ status->GetStatus() == AliShuttleStatus::kSkipped) {
Log("SHUTTLE", Form("ContinueProcessing - %s is already %s. Updating Shuttle Logbook",
fCurrentDetector.Data(),
status->GetStatusName(status->GetStatus())));
- UpdateShuttleLogbook(fCurrentDetector.Data(),
- status->GetStatusName(status->GetStatus()));
+
+ if (status->GetStatus() == AliShuttleStatus::kSkipped)
+ {
+ UpdateShuttleLogbook(fCurrentDetector.Data(), "DONE");
+ }
+ else
+ UpdateShuttleLogbook(fCurrentDetector.Data(), status->GetStatusName(status->GetStatus()));
+
return kFALSE;
}
- if (status->GetStatus() == AliShuttleStatus::kStoreStarted || status->GetStatus() == AliShuttleStatus::kStoreError) {
+ if (status->GetStatus() == AliShuttleStatus::kStoreStarted || status->GetStatus() == AliShuttleStatus::kStoreDelayed ||status->GetStatus() == AliShuttleStatus::kStoreError) {
Log("SHUTTLE",
Form("ContinueProcessing - %s: Grid storage of one or more "
"objects failed. Trying again now",
fCurrentDetector.Data()));
- UpdateShuttleStatus(AliShuttleStatus::kStoreStarted);
- if (StoreOCDB()){
- Log("SHUTTLE", Form("ContinueProcessing - %s: all objects "
- "successfully stored into main storage",
- fCurrentDetector.Data()));
- UpdateShuttleStatus(AliShuttleStatus::kDone);
- UpdateShuttleLogbook(fCurrentDetector, "DONE");
- } else {
- Log("SHUTTLE",
- Form("ContinueProcessing - %s: Grid storage failed again",
- fCurrentDetector.Data()));
- UpdateShuttleStatus(AliShuttleStatus::kStoreError);
- }
+ StoreOCDB();
return kFALSE;
}
// Send mail to detector expert!
Log("SHUTTLE", Form("ContinueProcessing - Sending mail to %s expert...",
fCurrentDetector.Data()));
- if (!SendMail())
+ if (!SendMail(kPPEMail))
Log("SHUTTLE", Form("ContinueProcessing - Could not send mail to %s expert",
fCurrentDetector.Data()));
status->GetStatusName(), status->GetCount()));
Bool_t increaseCount = kTRUE;
if (status->GetStatus() == AliShuttleStatus::kDCSError ||
- status->GetStatus() == AliShuttleStatus::kDCSStarted)
+ status->GetStatus() == AliShuttleStatus::kDCSStarted ||
+ status->GetStatus() == AliShuttleStatus::kFXSError)
increaseCount = kFALSE;
UpdateShuttleStatus(AliShuttleStatus::kStarted, increaseCount);
GetCurrentRun()));
// Send the information to ML
+ CountOpenRuns();
+
TMonaLisaText mlStatus("SHUTTLE_status", "Processing");
- TMonaLisaText mlRunType("SHUTTLE_runtype", Form("%s (%s)", entry->GetRunType(), entry->GetRunParameter("log")));
+ TString runType(entry->GetRunType());
+ if (strlen(entry->GetRunParameter("log")) > 0){
+
+ runType += "(";
+ runType += entry->GetRunParameter("log");
+ runType += ")";
+ }
+ TMonaLisaText mlRunType("SHUTTLE_runtype", runType);
TList mlList;
mlList.Add(&mlStatus);
{
fCurrentDetector = aDetector->String();
- if (ContinueProcessing() == kFALSE) continue;
+ if (ContinueProcessing() == kFALSE)
+ continue;
if (first)
{
gSystem->Exit(1);
}
- Bool_t success = ProcessCurrentDetector();
+ Int_t success = ProcessCurrentDetector();
gSystem->ChangeDirectory(wd.Data());
- if (success) // Preprocessor finished successfully!
+ if (success == 1) // Preprocessor finished successfully!
{
// remove temporary folder or DCS map
if (!fConfig->KeepTempFolder())
fCurrentDetector.Data()));
// Transfer the data from local storage to main storage (Grid)
- UpdateShuttleStatus(AliShuttleStatus::kStoreStarted);
if (StoreOCDB() == kFALSE)
- {
- Log("SHUTTLE",
- Form("\t\t\t****** run %d - %s: STORAGE ERROR ******",
- GetCurrentRun(), aDetector->GetName()));
- UpdateShuttleStatus(AliShuttleStatus::kStoreError);
success = kFALSE;
- } else {
- Log("SHUTTLE",
- Form("\t\t\t****** run %d - %s: DONE ******",
- GetCurrentRun(), aDetector->GetName()));
- UpdateShuttleStatus(AliShuttleStatus::kDone);
- UpdateShuttleLogbook(fCurrentDetector, "DONE");
- }
- } else
+ }
+ else if (success == 0)
{
Log("SHUTTLE",
Form("\t\t\t****** run %d - %s: PP ERROR ******",
fFirstUnprocessed[iDet] = kFALSE;
}
}
+ TMonaLisaText mlStatusPending("SHUTTLE_status", "Pending");
+ mlList.Clear();
+ mlList.Add(&mlStatusPending);
+ fMonaLisa->SendParameters(&mlList, mlID);
}
}
}
//______________________________________________________________________________________________
-Bool_t AliShuttle::ProcessCurrentDetector()
+Int_t AliShuttle::ProcessCurrentDetector()
{
//
// Makes data retrieval just for a specific detector (fCurrentDetector).
TString wd = gSystem->WorkingDirectory();
if (!CleanReferenceStorage(fCurrentDetector.Data()))
- return kFALSE;
+ return 0;
gSystem->ChangeDirectory(wd.Data());
- TMap* dcsMap = new TMap();
-
// call preprocessor
AliPreprocessor* aPreprocessor =
dynamic_cast<AliPreprocessor*> (fPreprocessorMap.GetValue(fCurrentDetector));
+ // check if the preprocessor wants to process this run type
+ if (aPreprocessor->ProcessRunType() == kFALSE)
+ {
+ UpdateShuttleStatus(AliShuttleStatus::kSkipped);
+ UpdateShuttleLogbook(fCurrentDetector, "DONE");
+ Log(fCurrentDetector, Form("ProcessCurrentDetector - %s preprocessor is not interested in this run type", fCurrentDetector.Data()));
+
+ return 2;
+ }
+
+ TMap* dcsMap = new TMap();
+
aPreprocessor->Initialize(GetCurrentRun(), GetCurrentStartTime(), GetCurrentEndTime());
Bool_t processDCS = aPreprocessor->ProcessDCS();
UpdateShuttleStatus(AliShuttleStatus::kDCSStarted);
UpdateShuttleStatus(AliShuttleStatus::kDCSError);
delete dcsMap;
- return kFALSE;
+ return 0;
} else {
UpdateShuttleStatus(AliShuttleStatus::kDCSStarted);
" Sending mail to DCS experts!", host.Data()));
UpdateShuttleStatus(AliShuttleStatus::kDCSError);
- if (!SendMailToDCS())
+ if (!SendMail(kDCSEMail))
Log("SHUTTLE", Form("ProcessCurrentDetector - "
"Could not send mail to DCS experts!"));
delete dcsMap;
- return kFALSE;
+ return 0;
}
}
" Sending mail to DCS experts!", host.Data()));
UpdateShuttleStatus(AliShuttleStatus::kDCSError);
- if (!SendMailToDCS())
+ if (!SendMail(kDCSEMail))
Log("SHUTTLE", Form("ProcessCurrentDetector - "
"Could not send mail to DCS experts!"));
if (aliasMap) delete aliasMap;
delete dcsMap;
- return kFALSE;
+ return 0;
}
}
// DCS Archive DB processing successful. Call Preprocessor!
UpdateShuttleStatus(AliShuttleStatus::kPPStarted);
+ fFXSError = -1; // this variable is kTRUE after ::Process if an FXS error occured
+
UInt_t returnValue = aPreprocessor->Process(dcsMap);
+
+ if (fFXSError!=-1) {
+ UpdateShuttleStatus(AliShuttleStatus::kFXSError);
+ SendMail(kFXSEMail, fFXSError);
+ dcsMap->DeleteAll();
+ delete dcsMap;
+ return 0;
+ }
if (returnValue > 0) // Preprocessor error!
{
UpdateShuttleStatus(AliShuttleStatus::kPPError);
dcsMap->DeleteAll();
delete dcsMap;
- return kFALSE;
+ return 0;
}
// preprocessor ok!
dcsMap->DeleteAll();
delete dcsMap;
- return kTRUE;
+ return 1;
}
//______________________________________________________________________________________________
for (Int_t ii = 0; ii < aResult->GetFieldCount(); ii++)
entry->SetRunParameter(aResult->GetFieldName(ii), aRow->GetField(ii));
+ delete aRow;
+ delete aResult;
+
UInt_t startTime = entry->GetStartTime();
UInt_t endTime = entry->GetEndTime();
-
-// if (!startTime || !endTime || startTime > endTime)
-// {
-// Log("SHUTTLE",
-// Form("QueryRunParameters - Invalid parameters for Run %d: startTime = %d, endTime = %d. Skipping!",
-// run, startTime, endTime));
-//
-// Log("SHUTTLE", Form("Marking SHUTTLE done for run %d", run));
-// fLogbookEntry = entry;
-// if (!UpdateShuttleLogbook("shuttle_done"))
-// {
-// AliError(Form("Could not update logbook for run %d !", run));
-// }
-// fLogbookEntry = 0;
-//
-// delete entry;
-// delete aRow;
-// delete aResult;
-// return 0;
-// }
-
- if (!startTime)
- {
- Log("SHUTTLE",
- Form("QueryRunParameters - Invalid parameters for Run %d: "
- "startTime = %d, endTime = %d. Skipping!",
- run, startTime, endTime));
-
- Log("SHUTTLE", Form("Marking SHUTTLE done for run %d", run));
- fLogbookEntry = entry;
- if (!UpdateShuttleLogbook("shuttle_ignored"))
- {
- AliError(Form("Could not update logbook for run %d !", run));
- }
- fLogbookEntry = 0;
-
- delete entry;
- delete aRow;
- delete aResult;
- return 0;
- }
- if (startTime && !endTime)
- {
- // TODO Here we don't mark SHUTTLE done, because this may mean
- //the run is still ongoing!!
- Log("SHUTTLE",
- Form("QueryRunParameters - Invalid parameters for Run %d: "
- "startTime = %d, endTime = %d. Skipping (Shuttle won't be marked as DONE)!",
- run, startTime, endTime));
-
- //Log("SHUTTLE", Form("Marking SHUTTLE done for run %d", run));
- //fLogbookEntry = entry;
- //if (!UpdateShuttleLogbook("shuttle_done"))
- //{
- // AliError(Form("Could not update logbook for run %d !", run));
- //}
- //fLogbookEntry = 0;
-
- delete entry;
- delete aRow;
- delete aResult;
- return 0;
- }
-
- if (startTime && endTime && (startTime > endTime))
- {
- Log("SHUTTLE",
- Form("QueryRunParameters - Invalid parameters for Run %d: "
- "startTime = %d, endTime = %d. Skipping!",
- run, startTime, endTime));
-
- Log("SHUTTLE", Form("Marking SHUTTLE done for run %d", run));
- fLogbookEntry = entry;
- if (!UpdateShuttleLogbook("shuttle_ignored"))
- {
- AliError(Form("Could not update logbook for run %d !", run));
- }
- fLogbookEntry = 0;
-
- delete entry;
- delete aRow;
- delete aResult;
- return 0;
- }
-
TString totEventsStr = entry->GetRunParameter("totalEvents");
Int_t totEvents = totEventsStr.Atoi();
- if (totEvents < 1)
- {
- Log("SHUTTLE",
- Form("QueryRunParameters - Run %d has 0 events - Skipping!", run));
-
- Log("SHUTTLE", Form("Marking SHUTTLE done for run %d", run));
- fLogbookEntry = entry;
- if (!UpdateShuttleLogbook("shuttle_ignored"))
- {
- AliError(Form("Could not update logbook for run %d !", run));
- }
- fLogbookEntry = 0;
-
- delete entry;
- delete aRow;
- delete aResult;
- return 0;
- }
+
+ if (startTime != 0 && endTime != 0 && endTime > startTime && totEvents > 0)
+ return entry;
- delete aRow;
- delete aResult;
+ Log("SHUTTLE",
+ Form("QueryRunParameters - Invalid parameters for Run %d: "
+ "startTime = %d, endTime = %d. Skipping (Shuttle won't be marked as DONE)!",
+ run, startTime, endTime));
- return entry;
+ if (totEvents < 1)
+ Log("SHUTTLE", Form("QueryRunParameters - Run %d has 0 events - Skipping!", run));
+
+ //Log("SHUTTLE", Form("Marking SHUTTLE done for run %d", run));
+ //fLogbookEntry = entry;
+ //if (!UpdateShuttleLogbook("shuttle_done"))
+ //{
+ // AliError(Form("Could not update logbook for run %d !", run));
+ //}
+ //fLogbookEntry = 0;
+
+ delete entry;
+ return 0;
}
//______________________________________________________________________________________________
if (!Connect(system))
{
Log(detector, Form("GetFile - Couldn't connect to %s FXS database", GetSystemName(system)));
+ fFXSError = system;
return 0;
}
else if (system == kHLT)
{
whereClause += Form(" and DDLnumbers=\"%s\"", source);
- nFields = 3;
}
TString sqlQuery = Form("%s %s", sqlQueryStart.Data(), whereClause.Data());
TSQLResult* aResult = 0;
aResult = dynamic_cast<TSQLResult*> (fServer[system]->Query(sqlQuery));
if (!aResult) {
- Log(detector, Form("GetFileName - Can't execute SQL query to %s database for: id = %s, source = %s",
+ Log(detector, Form("GetFile - Can't execute SQL query to %s database for: id = %s, source = %s",
GetSystemName(system), id, sourceName.Data()));
+ fFXSError = system;
return 0;
}
- if(aResult->GetRowCount() == 0)
+ if (aResult->GetRowCount() == 0)
{
Log(detector,
- Form("GetFileName - No entry in %s FXS db for: id = %s, source = %s",
+ Form("GetFile - No entry in %s FXS db for: id = %s, source = %s",
GetSystemName(system), id, sourceName.Data()));
delete aResult;
return 0;
if (aResult->GetRowCount() > 1) {
Log(detector,
- Form("GetFileName - More than one entry in %s FXS db for: id = %s, source = %s",
+ Form("GetFile - More than one entry in %s FXS db for: id = %s, source = %s",
GetSystemName(system), id, sourceName.Data()));
+ fFXSError = system;
delete aResult;
return 0;
}
Log(detector,
Form("GetFileName - Wrong field count in %s FXS db for: id = %s, source = %s",
GetSystemName(system), id, sourceName.Data()));
+ fFXSError = system;
delete aResult;
return 0;
}
TSQLRow* aRow = dynamic_cast<TSQLRow*> (aResult->Next());
if (!aRow){
- Log(detector, Form("GetFileName - Empty set result in %s FXS db from query: id = %s, source = %s",
+ Log(detector, Form("GetFile - Empty set result in %s FXS db from query: id = %s, source = %s",
GetSystemName(system), id, sourceName.Data()));
+ fFXSError = system;
delete aResult;
return 0;
}
Bool_t result = kFALSE;
// copy!! if successful TSystem::Exec returns 0
- while(nRetries++ < maxRetries) {
+ while (nRetries++ < maxRetries) {
AliDebug(2, Form("Trying to copy file. Retry # %d", nRetries));
result = RetrieveFile(system, filePath.Data(), localFileName.Data());
- if(!result)
+ if (!result)
{
- Log(detector, Form("GetFileName - Copy of file %s from %s FXS failed",
+ Log(detector, Form("GetFile - Copy of file %s from %s FXS failed",
filePath.Data(), GetSystemName(system)));
continue;
}
if (sizeComp != 0 || size != fileSize.Atoi())
{
- Log(detector, Form("GetFileName - size of file %s does not match with local copy!",
+ Log(detector, Form("GetFile - size of file %s does not match with local copy!",
filePath.Data()));
result = kFALSE;
continue;
{
// compare md5sum of local file with the one stored in the FXS DB
if(fileChecksum.Contains(' ')) fileChecksum.Resize(fileChecksum.First(' '));
- Int_t md5Comp = gSystem->Exec(Form("md5sum %s |grep %s 2>&1 > /dev/null",
+ Int_t md5Comp = gSystem->Exec(Form("md5sum %s |grep %s > /dev/null 2> /dev/null",
localFileName.Data(), fileChecksum.Data()));
if (md5Comp != 0)
{
- Log(detector, Form("GetFileName - md5sum of file %s does not match with local copy!",
+ Log(detector, Form("GetFile - md5sum of file %s does not match with local copy!",
filePath.Data()));
result = kFALSE;
continue;
if (result) break;
}
- if(!result) return 0;
+ if (!result)
+ {
+ fFXSError = system;
+ return 0;
+ }
fFXSCalled[system]=kTRUE;
TObjString *fileParams = new TObjString(Form("%s#!?!#%s", id, sourceName.Data()));
}
}
- TString baseFXSFolder;
- if (system == kDAQ)
- {
- baseFXSFolder = "FES/";
- }
- else if (system == kDCS)
- {
- baseFXSFolder = "";
- }
- else if (system == kHLT)
- {
- baseFXSFolder = "/opt/FXS/";
- }
-
-
- TString command = Form("scp -oPort=%d -2 %s@%s:%s%s %s",
+ TString command = Form("scp -oPort=%d -2 %s@%s:%s/%s %s",
fConfig->GetFXSPort(system),
fConfig->GetFXSUser(system),
fConfig->GetFXSHost(system),
- baseFXSFolder.Data(),
+ fConfig->GetFXSBaseFolder(system),
fxsFileName,
localFileName);
if (!Connect(system))
{
Log(detector, Form("GetFileSources - Couldn't connect to %s FXS database", GetSystemName(system)));
+ fFXSError = system;
return NULL;
}
if (!aResult) {
Log(detector, Form("GetFileSources - Can't execute SQL query to %s database for id: %s",
GetSystemName(system), id));
+ fFXSError = system;
return 0;
}
if (detName == "shuttle_done")
{
- // Send the information to ML
- TMonaLisaText mlStatus("SHUTTLE_status", "Done");
+ if (TouchFile()==kTRUE){
+ //Send the information to ML
+ TMonaLisaText mlStatus("SHUTTLE_status", "Done");
- TList mlList;
- mlList.Add(&mlStatus);
-
- TString mlID;
- mlID.Form("%d", GetCurrentRun());
- fMonaLisa->SendParameters(&mlList, mlID);
+ TList mlList;
+ mlList.Add(&mlStatus);
+
+ TString mlID;
+ mlID.Form("%d", GetCurrentRun());
+ fMonaLisa->SendParameters(&mlList, mlID);
+ }
+ else{
+ return kFALSE;
+ }
+
}
} else {
TString statusStr(status);
}
//______________________________________________________________________________________________
-Bool_t AliShuttle::SendMail()
+Bool_t AliShuttle::SendMail(EMailTarget target, Int_t system)
{
//
// sends a mail to the subdetector expert in case of preprocessor error
if (!fConfig->SendMail())
return kTRUE;
- TString to="";
- TIter iterExperts(fConfig->GetResponsibles(fCurrentDetector));
- TObjString *anExpert=0;
- while ((anExpert = (TObjString*) iterExperts.Next()))
- {
- to += Form("%s,", anExpert->GetName());
- }
- if (to.Length() > 0)
- to.Remove(to.Length()-1);
- AliDebug(2, Form("to: %s",to.Data()));
-
- if (to.IsNull()) {
- Log("SHUTTLE", "List of detector responsibles not set!");
- return kFALSE;
+ if (target == kDCSEMail || target == kFXSEMail) {
+ if (!fFirstProcessing)
+ return kTRUE;
}
void* dir = gSystem->OpenDirectory(GetShuttleLogDir());
gSystem->FreeDirectory(dir);
}
- TString bodyFileName;
- bodyFileName.Form("%s/mail.body", GetShuttleLogDir());
- gSystem->ExpandPathName(bodyFileName);
-
- ofstream mailBody;
- mailBody.open(bodyFileName, ofstream::out);
-
- if (!mailBody.is_open())
+ // det experts in to
+ TString to="";
+ TIter *iterExperts;
+ if (target == kDCSEMail) {
+ iterExperts = new TIter(fConfig->GetAdmins(AliShuttleConfig::kAmanda));
+ }
+ else if (target == kFXSEMail) {
+ iterExperts = new TIter(fConfig->GetAdmins(system));
+ }
+ else {
+ iterExperts = new TIter(fConfig->GetResponsibles(fCurrentDetector));
+ }
+ TObjString *anExpert=0;
+ while ((anExpert = (TObjString*) iterExperts->Next()))
{
- Log("SHUTTLE", Form("Could not open mail body file %s", bodyFileName.Data()));
- return kFALSE;
- }
+ to += Form("%s,", anExpert->GetName());
+ }
+ delete iterExperts;
+ if (to.Length() > 0)
+ to.Remove(to.Length()-1);
+ AliDebug(2, Form("to: %s",to.Data()));
+ if (to.IsNull()) {
+ Log("SHUTTLE", Form("List of %d responsibles not set!", (Int_t) target));
+ return kFALSE;
+ }
+
+ // SHUTTLE responsibles in cc
TString cc="";
TIter iterAdmins(fConfig->GetAdmins(AliShuttleConfig::kGlobal));
TObjString *anAdmin=0;
cc.Remove(cc.Length()-1);
AliDebug(2, Form("cc: %s",to.Data()));
- TString subject = Form("%s Shuttle preprocessor FAILED in run %d (run type = %s)!",
- fCurrentDetector.Data(), GetCurrentRun(), GetRunType());
- AliDebug(2, Form("subject: %s", subject.Data()));
-
- TString body = Form("Dear %s expert(s), \n\n", fCurrentDetector.Data());
- body += Form("SHUTTLE just detected that your preprocessor "
- "failed processing run %d (run type = %s)!!\n\n",
- GetCurrentRun(), GetRunType());
- body += Form("Please check %s status on the SHUTTLE monitoring page: \n\n",
- fCurrentDetector.Data());
- if (fConfig->GetRunMode() == AliShuttleConfig::kTest)
- {
- body += Form("\thttp://pcalimonitor.cern.ch:8889/shuttle.jsp?time=168 \n\n");
- } else {
- body += Form("\thttp://pcalimonitor.cern.ch/shuttle.jsp?instance=PROD&time=168 \n\n");
- }
-
-
- TString logFolder = "logs";
- if (fConfig->GetRunMode() == AliShuttleConfig::kProd)
- logFolder += "_PROD";
-
-
- body += Form("Find the %s log for the current run on \n\n"
- "\thttp://pcalishuttle01.cern.ch:8880/%s/%d/%s_%d.log \n\n",
- fCurrentDetector.Data(), logFolder.Data(), GetCurrentRun(),
- fCurrentDetector.Data(), GetCurrentRun());
- body += Form("The last 10 lines of %s log file are following:\n\n", fCurrentDetector.Data());
-
- AliDebug(2, Form("Body begin: %s", body.Data()));
-
- mailBody << body.Data();
- mailBody.close();
- mailBody.open(bodyFileName, ofstream::out | ofstream::app);
-
- TString logFileName = Form("%s/%d/%s_%d.log", GetShuttleLogDir(),
- GetCurrentRun(), fCurrentDetector.Data(), GetCurrentRun());
- TString tailCommand = Form("tail -n 10 %s >> %s", logFileName.Data(), bodyFileName.Data());
- if (gSystem->Exec(tailCommand.Data()))
- {
- mailBody << Form("%s log file not found ...\n\n", fCurrentDetector.Data());
- }
-
- TString endBody = Form("------------------------------------------------------\n\n");
- endBody += Form("In case of problems please contact the SHUTTLE core team.\n\n");
- endBody += "Please do not answer this message directly, it is automatically generated.\n\n";
- endBody += "Greetings,\n\n \t\t\tthe SHUTTLE\n";
-
- AliDebug(2, Form("Body end: %s", endBody.Data()));
-
- mailBody << endBody.Data();
-
- mailBody.close();
-
- // send mail!
- TString mailCommand = Form("mail -s \"%s\" -c %s %s < %s",
- subject.Data(),
- cc.Data(),
- to.Data(),
- bodyFileName.Data());
- AliDebug(2, Form("mail command: %s", mailCommand.Data()));
-
- Bool_t result = gSystem->Exec(mailCommand.Data());
-
- return result == 0;
-}
-
-//______________________________________________________________________________________________
-Bool_t AliShuttle::SendMailToDCS()
-{
- //
- // sends a mail to the DCS Amanda experts in case of DCS data point retrieval error
- //
-
- if (fTestMode != kNone)
- return kTRUE;
-
- if (!fConfig->SendMail())
- return kTRUE;
-
- if (!fFirstProcessing)
- return kTRUE;
-
- void* dir = gSystem->OpenDirectory(GetShuttleLogDir());
- if (dir == NULL)
- {
- if (gSystem->mkdir(GetShuttleLogDir(), kTRUE))
- {
- Log("SHUTTLE", Form("SendMailToDCS - Can't open directory <%s>", GetShuttleLogDir()));
- return kFALSE;
- }
-
- } else {
- gSystem->FreeDirectory(dir);
- }
-
+ // mail body
TString bodyFileName;
bodyFileName.Form("%s/mail.body", GetShuttleLogDir());
gSystem->ExpandPathName(bodyFileName);
if (!mailBody.is_open())
{
- Log("SHUTTLE", Form("SendMailToDCS - Could not open mail body file %s", bodyFileName.Data()));
+ Log("SHUTTLE", Form("Could not open mail body file %s", bodyFileName.Data()));
return kFALSE;
}
- TString to="";
- TIter iterExperts(fConfig->GetAdmins(AliShuttleConfig::kAmanda));
- TObjString *anExpert=0;
- while ((anExpert = (TObjString*) iterExperts.Next()))
- {
- to += Form("%s,", anExpert->GetName());
- }
- if (to.Length() > 0)
- to.Remove(to.Length()-1);
- AliDebug(2, Form("to: %s",to.Data()));
- if (to.IsNull()) {
- Log("SHUTTLE", "List of Amanda server administrators not set!");
- return kFALSE;
- }
+ TString subject;
+ TString body;
- TString cc="";
- TIter iterAdmins(fConfig->GetAdmins(AliShuttleConfig::kGlobal));
- TObjString *anAdmin=0;
- while ((anAdmin = (TObjString*) iterAdmins.Next()))
- {
- cc += Form("%s,", anAdmin->GetName());
+ if (target == kDCSEMail){
+ subject = Form("Retrieval of data points for %s FAILED in run %d !",
+ fCurrentDetector.Data(), GetCurrentRun());
+ AliDebug(2, Form("subject: %s", subject.Data()));
+
+ body = Form("Dear DCS experts, \n\n");
+ body += Form("SHUTTLE couldn\'t retrieve the data points for detector %s "
+ "in run %d!!\n\n", fCurrentDetector.Data(), GetCurrentRun());
}
- if (cc.Length() > 0)
- cc.Remove(cc.Length()-1);
- AliDebug(2, Form("cc: %s",to.Data()));
-
- TString subject = Form("Retrieval of data points for %s FAILED in run %d !",
+ else if (target == kFXSEMail){
+ subject = Form("FXS communication for %s FAILED in run %d !",
fCurrentDetector.Data(), GetCurrentRun());
- AliDebug(2, Form("subject: %s", subject.Data()));
+ AliDebug(2, Form("subject: %s", subject.Data()));
+ TString sys;
+ if (system == kDAQ) sys="DAQ";
+ else if (system == kDCS) sys="DCS";
+ else if (system == kHLT) sys="HLT";
+ else return kFALSE;
+ body = Form("Dear %s FXS experts, \n\n",sys.Data());
+ body += Form("SHUTTLE couldn\'t retrieve data from the FXS for detector %s "
+ "in run %d!!\n\n", fCurrentDetector.Data(), GetCurrentRun());
+ }
+ else {
+ subject = Form("%s Shuttle preprocessor FAILED in run %d (run type = %s)!",
+ fCurrentDetector.Data(), GetCurrentRun(), GetRunType());
+ AliDebug(2, Form("subject: %s", subject.Data()));
+
+ body = Form("Dear %s expert(s), \n\n", fCurrentDetector.Data());
+ body += Form("SHUTTLE just detected that your preprocessor "
+ "failed processing run %d (run type = %s)!!\n\n",
+ GetCurrentRun(), GetRunType());
+ }
- TString body = Form("Dear DCS experts, \n\n");
- body += Form("SHUTTLE couldn\'t retrieve the data points for detector %s "
- "in run %d!!\n\n", fCurrentDetector.Data(), GetCurrentRun());
body += Form("Please check %s status on the SHUTTLE monitoring page: \n\n",
fCurrentDetector.Data());
if (fConfig->GetRunMode() == AliShuttleConfig::kTest)
{
body += Form("\thttp://pcalimonitor.cern.ch:8889/shuttle.jsp?time=168 \n\n");
} else {
- body += Form("\thttp://pcalimonitor.cern.ch/shuttle.jsp?instance=PROD?time=168 \n\n");
+ body += Form("\thttp://pcalimonitor.cern.ch/shuttle.jsp?instance=PROD&time=168 \n\n");
}
-
+
+
TString logFolder = "logs";
if (fConfig->GetRunMode() == AliShuttleConfig::kProd)
logFolder += "_PROD";
mailBody.close();
mailBody.open(bodyFileName, ofstream::out | ofstream::app);
- TString logFileName = Form("%s/%d/%s_%d.log", GetShuttleLogDir(), GetCurrentRun(),
- fCurrentDetector.Data(), GetCurrentRun());
+ TString logFileName = Form("%s/%d/%s_%d.log", GetShuttleLogDir(),
+ GetCurrentRun(), fCurrentDetector.Data(), GetCurrentRun());
TString tailCommand = Form("tail -n 10 %s >> %s", logFileName.Data(), bodyFileName.Data());
if (gSystem->Exec(tailCommand.Data()))
{
Bool_t AliShuttle::GetHLTStatus()
{
// Return HLT status (ON=1 OFF=0)
- // Converts the HLT status from the status string read in the run logbook (not just a bool)
+ // Converts the HLT status from the mode string read in the run logbook (not just a bool)
if(!fLogbookEntry) {
AliError("No logbook entry!");
return 0;
}
- // TODO implement when HLTStatus is inserted in run logbook
- //TString hltStatus = fLogbookEntry->GetRunParameter("HLTStatus");
- //if(hltStatus == "OFF") {return kFALSE};
-
- return kTRUE;
+ // TODO implement when HLTMode is inserted in run logbook
+ TString hltMode = fLogbookEntry->GetRunParameter("HLTMode");
+ TSubString firstChar = hltMode(0,1);
+ AliDebug(2,Form("First char = %s ",firstChar.Data()));
+ if (firstChar == "A") {
+ return kFALSE;
+ }
+ else if ((firstChar == "B") || (firstChar == "C") || (firstChar == "D") || (firstChar == "E")) {
+ return kTRUE;
+ }
+ else {
+ Log("SHUTTLE","Unexpected HLT mode! Returning 0....");
+ return kFALSE;
+ }
}
-
//______________________________________________________________________________________________
void AliShuttle::SetShuttleTempDir(const char* tmpDir)
{
fgkShuttleLogDir = gSystem->ExpandPathName(logDir);
}
+//______________________________________________________________________________________________
+Bool_t AliShuttle::TouchFile()
+{
+ //
+ // touching a file on the grid if run has been DONE
+ //
+
+ if (!gGrid)
+ {
+ Log("SHUTTLE",Form("No TGrid connection estabilished!"));
+ Log("SHUTTLE",Form("Could not touch file for run %i",GetCurrentRun()));
+ return kFALSE;
+ }
+
+ TString command;
+ command.Form("touch /alice/data/%d/%s/SHUTTLE_DONE/shuttle_done_%i", GetCurrentYear(), GetLHCPeriod(),GetCurrentRun());
+ TGridResult *resultTouch = dynamic_cast<TGridResult*>(gGrid->Command(command));
+ if (resultTouch){
+ TMap *mapTouch = dynamic_cast<TMap*>(resultTouch->At(0));
+ if (mapTouch){
+ TObjString *valueTouch = dynamic_cast<TObjString*>(mapTouch->GetValue("__result__"));
+ if (valueTouch){
+ if (valueTouch->GetString()=="1"){
+ return kTRUE;
+ }
+ else {
+ Log("SHUTTLE",Form("No value for __result__ key set in the map for touching command"));
+ }
+ }
+ else {
+ Log("SHUTTLE",Form("No value set in the map for touching command"));
+ }
+ }
+ else {
+ Log("SHUTTLE",Form("No map for touching command"));
+ }
+ }
+
+ else {
+ Log("SHUTTLE",Form("No result for touching command"));
+ }
+ Log("SHUTTLE",Form("Could not touch file for run %i",GetCurrentRun()));
+ return kFALSE;
+}
+
+