/*
$Log$
+Revision 1.81 2007/12/20 14:24:59 jgrosseo
+Do not increase count in case of StoreError
+
+Revision 1.80 2007/12/20 13:31:28 acolla
+Bug fix (Jan Fiete): recovering from StoreError, if the store to OCDB is successful,
+the Shuttle sets current detector's status=done
+
+Revision 1.79 2007/12/19 14:03:01 acolla
+
+detector name to build the lhcPeriod_DET is to be looked in "detector" column, not "partition"
+
+Revision 1.78 2007/12/19 11:50:41 acolla
+
+Raw data tag merged files is written in /alice/data/.../lhcPeriod_DET/runNb/raw if partition is made of DET only
+
+Revision 1.77 2007/12/19 11:16:16 acolla
+More meaningful log message added in GetFileSources
+
+Revision 1.76 2007/12/19 07:45:20 acolla
+bug fix in the name of the raw tag files (Raw instead of raw)
+
+Revision 1.75 2007/12/18 15:42:14 jgrosseo
+adding number of open runs to monitoring
+
+Revision 1.74 2007/12/17 03:23:32 jgrosseo
+several bugfixes
+added "empty preprocessor" as placeholder for Acorde in FDR
+
+Revision 1.73 2007/12/14 19:31:36 acolla
+Sending email to DCS experts is temporarily commented
+
+Revision 1.72 2007/12/13 15:44:28 acolla
+Run type added in mail sent to detector expert (eases understanding)
+
+Revision 1.71 2007/12/12 14:56:14 jgrosseo
+sending shuttle_ignore to ML also in case of 0 events
+
+Revision 1.70 2007/12/12 13:45:35 acolla
+Monalisa started in Collect() function. Alive message to monitor is sent at each Collect and every minute during preprocessor processing.
+
+Revision 1.69 2007/12/12 10:06:29 acolla
+in AliShuttle.cxx: SHUTTLE logbook is updated in case of invalid run times:
+
+time_start==0 && time_end==0
+
+logbook is NOT updated if time_start != 0 && time_end == 0, because it may mean that the run is still ongoing.
+
+Revision 1.68 2007/12/11 10:15:17 acolla
+Added marking SHUTTLE=DONE for invalid runs
+(invalid start time or end time) and runs with totalEvents < 1
+
+Revision 1.67 2007/12/07 19:14:36 acolla
+in AliShuttleTrigger:
+
+Added automatic collection of new runs on a regular time basis (settable from the configuration)
+
+in AliShuttleConfig: new members
+
+- triggerWait: time to wait for DIM trigger (s) before starting automatic collection of new runs
+- mode: run mode (test, prod) -> used to build log folder (logs or logs_PROD)
+
+in AliShuttle:
+
+- logs now stored in logs/#RUN/DET_#RUN.log
+
+Revision 1.66 2007/12/05 10:45:19 jgrosseo
+changed order of arguments to TMonaLisaWriter
+
+Revision 1.65 2007/11/26 16:58:37 acolla
+Monalisa configuration added: host and table name
+
+Revision 1.64 2007/11/13 16:15:47 acolla
+DCS map is stored in a file in the temp folder where the detector is processed.
+If the preprocessor fails, the temp folder is not removed. This will help the debugging of the problem.
+
+Revision 1.63 2007/11/02 10:53:16 acolla
+Protection added to AliShuttle::CopyFileLocally
+
+Revision 1.62 2007/10/31 18:23:13 acolla
+Furter developement on the Shuttle:
+
+- Shuttle now connects to the Grid as alidaq. The OCDB and Reference folders
+are now built from /alice/data, e.g.:
+/alice/data/2007/LHC07a/OCDB
+
+the year and LHC period are taken from the Shuttle.
+Raw metadata files are stored by GRP to:
+/alice/data/2007/LHC07a/<runNb>/Raw/RunMetadata.root
+
+- Shuttle sends a mail to DCS experts each time DP retrieval fails.
+
+Revision 1.61 2007/10/30 20:33:51 acolla
+Improved managing of temporary folders, which weren't correctly handled.
+Resolved bug introduced in StoreReferenceFile, which caused SPD preprocessor fail.
+
Revision 1.60 2007/10/29 18:06:16 acolla
New function StoreRunMetadataFile added to preprocessor and Shuttle interface
Log("SHUTTLE", Form("StoreOCDB - %s: object %s has validity infinite but "
"there are previous unprocessed runs!",
fCurrentDetector.Data(), aLocId.GetPath().Data()));
+ result = kFALSE;
continue;
}
TString localBaseFolder = sto->GetBaseFolder();
// Build Run level folder
- // folder = /alice/data/year/lhcPeriod/runNb/Raw
+ // folder = /alice/data/year/lhcPeriod/runNb/raw
- TTimeStamp startTime(GetCurrentStartTime());
-
- TString year = Form("%d",startTime.GetDate());
- year = year(0,4);
- TString lhcPeriod = GetRunParameter("LHCperiod");
-
+ TString lhcPeriod = GetLHCPeriod();
if (lhcPeriod.Length() == 0)
{
Log("SHUTTLE","StoreRunMetaDataFile - LHCPeriod not found in logbook!");
return 0;
}
- // TODO: currently SHUTTLE cannot write in /alice/data/ !!!!!
- //TString target = Form("%s/GRP/RunMetadata/alice/data/%s/%s/%d/Raw/%s",
- // localBaseFolder.Data(), year.Data(),
- // lhcPeriod.Data(), GetCurrentRun(), gridFileName);
+ // TODO partitions with one detector only write data into LHCperiod_DET
+ TString partition = GetRunParameter("detector");
- TString target = Form("%s/GRP/RunMetadata/alice/simulation/%s/%s/%d/Raw/%s",
- localBaseFolder.Data(), year.Data(),
+ if (partition.Length() > 0 && partition != "ALICE")
+ {
+ lhcPeriod.Append(Form("_%s", partition.Data()));
+ Log(fCurrentDetector, Form("Run data tags merged file will be written in %s",
+ lhcPeriod.Data()));
+ }
+
+ TString target = Form("%s/GRP/RunMetadata/alice/data/%d/%s/%09d/raw/%s",
+ localBaseFolder.Data(), GetCurrentYear(),
lhcPeriod.Data(), GetCurrentRun(), gridFileName);
-
-
return CopyFileLocally(localFile, target);
}
void* dir = gSystem->OpenDirectory(targetDir.Data());
if (dir == NULL) {
if (gSystem->mkdir(targetDir.Data(), kTRUE)) {
- Log("SHUTTLE", Form("StoreFileLocally - Can't open directory <%s>", targetDir.Data()));
+ Log("SHUTTLE", Form("CopyFileLocally - Can't open directory <%s>", targetDir.Data()));
return kFALSE;
}
gSystem->FreeDirectory(dir);
}
- Int_t result = gSystem->GetPathInfo(localFile, 0, (Long64_t*) 0, 0, 0);
+ Int_t result = 0;
+
+ result = gSystem->GetPathInfo(localFile, 0, (Long64_t*) 0, 0, 0);
if (result)
{
- Log("SHUTTLE", Form("StoreFileLocally - %s does not exist", localFile));
+ Log("SHUTTLE", Form("CopyFileLocally - %s does not exist", localFile));
return kFALSE;
}
+ result = gSystem->GetPathInfo(target, 0, (Long64_t*) 0, 0, 0);
+ if (!result)
+ {
+ Log("SHUTTLE", Form("CopyFileLocally - target file %s already exist, removing...", target.Data()));
+ if (gSystem->Unlink(target.Data()))
+ {
+ Log("SHUTTLE", Form("CopyFileLocally - Could not remove existing target file %s!", target.Data()));
+ return kFALSE;
+ }
+ }
+
result = gSystem->CopyFile(localFile, target);
if (result == 0)
{
- Log("SHUTTLE", Form("StoreFileLocally - File %s stored locally to %s", localFile, target.Data()));
+ Log("SHUTTLE", Form("CopyFileLocally - File %s stored locally to %s", localFile, target.Data()));
return kTRUE;
}
else
{
- Log("SHUTTLE", Form("StoreFileLocally - Could not store file %s to %s! Error code = %d",
+ Log("SHUTTLE", Form("CopyFileLocally - Could not store file %s to %s! Error code = %d",
localFile, target.Data(), result));
return kFALSE;
}
}
else if (strcmp(type, "metadata") == 0)
{
- TTimeStamp startTime(GetCurrentStartTime());
-
- TString year = Form("%d",startTime.GetDate());
- year = year(0,4);
- TString lhcPeriod = GetRunParameter("LHCperiod");
+ TString lhcPeriod = GetLHCPeriod();
if (lhcPeriod.Length() == 0)
{
return 0;
}
- // TODO: currently SHUTTLE cannot write in /alice/data/ !!!!!
- //dir = Form("%s/GRP/RunMetadata/alice/data/%s/%s/%d/Raw",
- // localBaseFolder.Data(), year.Data(),
- // lhcPeriod.Data(), GetCurrentRun());
- //alienDir = dir(dir.Index("/alice/data/"), dir.Length());
+ // TODO partitions with one detector only write data into LHCperiod_DET
+ TString partition = GetRunParameter("detector");
+
+ if (partition.Length() > 0 && partition != "ALICE")
+ {
+ lhcPeriod.Append(Form("_%s", partition.Data()));
+ }
- dir = Form("%s/GRP/RunMetadata/alice/simulation/%s/%s/%d/Raw",
- localBaseFolder.Data(), year.Data(),
+ dir = Form("%s/GRP/RunMetadata/alice/data/%d/%s/%09d/raw",
+ localBaseFolder.Data(), GetCurrentYear(),
lhcPeriod.Data(), GetCurrentRun());
- alienDir = dir(dir.Index("/alice/simulation/"), dir.Length());
+ alienDir = dir(dir.Index("/alice/data/"), dir.Length());
+
begin = "";
}
else
if (!result->GetFileName(1)) // TODO: It looks like element 0 is always 0!!
{
+ // TODO It does not work currently! Bug in TAliEn::Mkdir
+ // TODO Manually fixed in local root v5-16-00
if (!gGrid->Mkdir(alienDir.Data(),"-p",0))
{
Log("SHUTTLE", Form("CopyFilesToGrid - Cannot create directory %s",
mlList.Add(&mlStatus);
mlList.Add(&mlRetryCount);
- fMonaLisa->SendParameters(&mlList);
+ TString mlID;
+ mlID.Form("%d", GetCurrentRun());
+ fMonaLisa->SendParameters(&mlList, mlID);
}
//______________________________________________________________________________________________
return kFALSE;
}
- if (status->GetStatus() == AliShuttleStatus::kStoreError) {
+ if (status->GetStatus() == AliShuttleStatus::kStoreStarted || status->GetStatus() == AliShuttleStatus::kStoreError) {
Log("SHUTTLE",
Form("ContinueProcessing - %s: Grid storage of one or more "
"objects failed. Trying again now",
"successfully stored into main storage",
fCurrentDetector.Data()));
UpdateShuttleStatus(AliShuttleStatus::kDone);
- UpdateShuttleLogbook(fCurrentDetector.Data(), "DONE");
+ UpdateShuttleLogbook(fCurrentDetector, "DONE");
} else {
Log("SHUTTLE",
Form("ContinueProcessing - %s: Grid storage failed again",
if (status->GetStatus() == AliShuttleStatus::kDCSError ||
status->GetStatus() == AliShuttleStatus::kDCSStarted)
increaseCount = kFALSE;
+
UpdateShuttleStatus(AliShuttleStatus::kStarted, increaseCount);
cont = kTRUE;
}
Log("SHUTTLE", Form("\t\t\t^*^*^*^*^*^*^*^*^*^*^*^* run %d: START ^*^*^*^*^*^*^*^*^*^*^*^*",
GetCurrentRun()));
- // create ML instance that monitors this run
- fMonaLisa = new TMonaLisaWriter(Form("%d", GetCurrentRun()), "SHUTTLE", "aliendb1.cern.ch");
- // disable monitoring of other parameters that come e.g. from TFile
- gMonitoringWriter = 0;
-
// Send the information to ML
TMonaLisaText mlStatus("SHUTTLE_status", "Processing");
TMonaLisaText mlRunType("SHUTTLE_runtype", Form("%s (%s)", entry->GetRunType(), entry->GetRunParameter("log")));
mlList.Add(&mlStatus);
mlList.Add(&mlRunType);
- fMonaLisa->SendParameters(&mlList);
+ TString mlID;
+ mlID.Form("%d", GetCurrentRun());
+ fMonaLisa->SendParameters(&mlList, mlID);
if (fLogbookEntry->IsDone())
{
// Initialization
Bool_t hasError = kFALSE;
- AliCDBStorage *mainCDBSto = AliCDBManager::Instance()->GetStorage(fgkMainCDB);
- if(mainCDBSto) mainCDBSto->QueryCDB(GetCurrentRun());
- AliCDBStorage *mainRefSto = AliCDBManager::Instance()->GetStorage(fgkMainRefStorage);
- if(mainRefSto) mainRefSto->QueryCDB(GetCurrentRun());
-
+ // Set the CDB and Reference folders according to the year and LHC period
+ TString lhcPeriod(GetLHCPeriod());
+ if (lhcPeriod.Length() == 0)
+ {
+ Log("SHUTTLE","Process - LHCPeriod not found in logbook!");
+ return 0;
+ }
+
+ if (fgkMainCDB.Length() == 0)
+ fgkMainCDB = Form("alien://folder=/alice/data/%d/%s/OCDB?user=alidaq?cacheFold=/tmp/OCDBCache",
+ GetCurrentYear(), lhcPeriod.Data());
+
+ if (fgkMainRefStorage.Length() == 0)
+ fgkMainRefStorage = Form("alien://folder=/alice/data/%d/%s/Reference?user=alidaq?cacheFold=/tmp/OCDBCache",
+ GetCurrentYear(), lhcPeriod.Data());
+
// Loop on detectors in the configuration
TIter iter(fConfig->GetDetectors());
TObjString* aDetector = 0;
+ Bool_t first = kTRUE;
+
while ((aDetector = (TObjString*) iter.Next()))
{
fCurrentDetector = aDetector->String();
if (ContinueProcessing() == kFALSE) continue;
+
+ if (first)
+ {
+ // only read QueryCDB when needed and only once
+ AliCDBStorage *mainCDBSto = AliCDBManager::Instance()->GetStorage(fgkMainCDB);
+ if(mainCDBSto) mainCDBSto->QueryCDB(GetCurrentRun());
+ AliCDBStorage *mainRefSto = AliCDBManager::Instance()->GetStorage(fgkMainRefStorage);
+ if(mainRefSto) mainRefSto->QueryCDB(GetCurrentRun());
+ first = kFALSE;
+ }
Log("SHUTTLE", Form("\t\t\t****** run %d - %s: START ******",
GetCurrentRun(), aDetector->GetName()));
}
if (expiredTime % 60 == 0)
+ {
Log("SHUTTLE", Form("Process - %s: Checking process. "
"Run time: %d seconds - Memory consumption: %d KB",
fCurrentDetector.Data(), expiredTime, mem));
+ SendAlive();
+ }
if (mem > fConfig->GetPPMaxMem())
{
}
TString wd = gSystem->WorkingDirectory();
- TString tmpDir = Form("%s/%s_process", GetShuttleTempDir(), fCurrentDetector.Data());
+ TString tmpDir = Form("%s/%s_%d_process", GetShuttleTempDir(),
+ fCurrentDetector.Data(), GetCurrentRun());
Int_t result = gSystem->GetPathInfo(tmpDir.Data(), 0, (Long64_t*) 0, 0, 0);
if (!result) // temp dir already exists!
Log(fCurrentDetector.Data(),
Form("Process - %s dir already exists! Removing...", tmpDir.Data()));
gSystem->Exec(Form("rm -rf %s",tmpDir.Data()));
- } else {
- if (gSystem->mkdir(tmpDir.Data(), 1))
- {
- Log(fCurrentDetector.Data(), "Process - could not make temp directory!!");
- gSystem->Exit(1);
- }
+ }
+
+ if (gSystem->mkdir(tmpDir.Data(), 1))
+ {
+ Log(fCurrentDetector.Data(), "Process - could not make temp directory!!");
+ gSystem->Exit(1);
}
if (!gSystem->ChangeDirectory(tmpDir.Data()))
Bool_t success = ProcessCurrentDetector();
gSystem->ChangeDirectory(wd.Data());
-
- gSystem->Exec(Form("rm -rf %s",tmpDir.Data()));
-
+
if (success) // Preprocessor finished successfully!
{
+ // remove temporary folder
+ // temporary commented (JF)
+ //gSystem->Exec(Form("rm -rf %s",tmpDir.Data()));
+
// Update time_processed field in FXS DB
if (UpdateTable() == kFALSE)
Log("SHUTTLE", Form("Process - %s: Could not update FXS databases!",
TObjArray checkEntryArray;
checkEntryArray.SetOwner(1);
TString whereClause = Form("where run=%d", GetCurrentRun());
- if (!QueryShuttleLogbook(whereClause.Data(), checkEntryArray) || checkEntryArray.GetEntries() == 0) {
+ if (!QueryShuttleLogbook(whereClause.Data(), checkEntryArray) ||
+ checkEntryArray.GetEntries() == 0) {
Log("SHUTTLE", Form("Process - Warning: Cannot check status of run %d on Shuttle logbook!",
GetCurrentRun()));
return hasError == kFALSE;
}
}
- // remove ML instance
- delete fMonaLisa;
- fMonaLisa = 0;
-
fLogbookEntry = 0;
return hasError == kFALSE;
{
Log(fCurrentDetector,
Form("ProcessCurrentDetector -"
- " Error retrieving DCS aliases from server %s",
- host.Data()));
+ " Error retrieving DCS aliases from server %s."
+ " Sending mail to DCS experts!", host.Data()));
UpdateShuttleStatus(AliShuttleStatus::kDCSError);
+
+ //if (!SendMailToDCS())
+ // Log("SHUTTLE", Form("ProcessCurrentDetector - Could not send mail to DCS experts!"));
+
delete dcsMap;
return kFALSE;
}
{
Log(fCurrentDetector,
Form("ProcessCurrentDetector -"
- " Error retrieving DCS data points from server %s",
- host.Data()));
+ " Error retrieving DCS data points from server %s."
+ " Sending mail to DCS experts!", host.Data()));
UpdateShuttleStatus(AliShuttleStatus::kDCSError);
+
+ //if (!SendMailToDCS())
+ // Log("SHUTTLE", Form("ProcessCurrentDetector - Could not send mail to DCS experts!"));
+
if (aliasMap) delete aliasMap;
delete dcsMap;
return kFALSE;
}
}
+ // save map into file, to help debugging in case of preprocessor error
+ TFile* f = TFile::Open("DCSMap.root","recreate");
+ f->cd();
+ dcsMap->Write("DCSMap", TObject::kSingleKey);
+ f->Close();
+ delete f;
+
// DCS Archive DB processing successful. Call Preprocessor!
UpdateShuttleStatus(AliShuttleStatus::kPPStarted);
return kTRUE;
}
+//______________________________________________________________________________________________
+void AliShuttle::CountOpenRuns()
+{
+ // Query DAQ's Shuttle logbook and sends the number of open runs to ML
+
+ // check connection, in case connect
+ if (!Connect(3))
+ return;
+
+ TString sqlQuery;
+ sqlQuery = Form("select count(*) from %s where shuttle_done=0", fConfig->GetShuttlelbTable());
+
+ TSQLResult* aResult = fServer[3]->Query(sqlQuery);
+ if (!aResult) {
+ AliError(Form("Can't execute query <%s>!", sqlQuery.Data()));
+ return;
+ }
+
+ AliDebug(2,Form("Query = %s", sqlQuery.Data()));
+
+ if (aResult->GetRowCount() == 0) {
+ AliError(Form("No result for query %s received", sqlQuery.Data()));
+ return;
+ }
+
+ if (aResult->GetFieldCount() != 1) {
+ AliError(Form("Invalid field count for query %s received", sqlQuery.Data()));
+ return;
+ }
+
+ TSQLRow* aRow = aResult->Next();
+ if (!aRow) {
+ AliError(Form("Could not receive result of query %s", sqlQuery.Data()));
+ return;
+ }
+
+ TString result(aRow->GetField(0), aRow->GetFieldLength(0));
+ Int_t count = result.Atoi();
+
+ Log("SHUTTLE", Form("%d unprocessed runs", count));
+
+ delete aRow;
+ delete aResult;
+
+ TMonaLisaValue mlStatus("SHUTTLE_openruns", count);
+
+ TList mlList;
+ mlList.Add(&mlStatus);
+
+ fMonaLisa->SendParameters(&mlList, "__PROCESSINGINFO__");
+}
+
//______________________________________________________________________________________________
Bool_t AliShuttle::QueryShuttleLogbook(const char* whereClause,
TObjArray& entries)
entries.SetOwner(1);
// check connection, in case connect
- if(!Connect(3)) return kFALSE;
+ if (!Connect(3)) return kFALSE;
TString sqlQuery;
sqlQuery = Form("select * from %s %s order by run", fConfig->GetShuttlelbTable(), whereClause);
UInt_t startTime = entry->GetStartTime();
UInt_t endTime = entry->GetEndTime();
- if (!startTime || !endTime || startTime > endTime) {
+// if (!startTime || !endTime || startTime > endTime)
+// {
+// Log("SHUTTLE",
+// Form("QueryRunParameters - Invalid parameters for Run %d: startTime = %d, endTime = %d. Skipping!",
+// run, startTime, endTime));
+//
+// Log("SHUTTLE", Form("Marking SHUTTLE done for run %d", run));
+// fLogbookEntry = entry;
+// if (!UpdateShuttleLogbook("shuttle_done"))
+// {
+// AliError(Form("Could not update logbook for run %d !", run));
+// }
+// fLogbookEntry = 0;
+//
+// delete entry;
+// delete aRow;
+// delete aResult;
+// return 0;
+// }
+
+ if (!startTime)
+ {
Log("SHUTTLE",
- Form("QueryRunParameters - Invalid parameters for Run %d: startTime = %d, endTime = %d",
- run, startTime, endTime));
+ Form("QueryRunParameters - Invalid parameters for Run %d: "
+ "startTime = %d, endTime = %d. Skipping!",
+ run, startTime, endTime));
+
+ Log("SHUTTLE", Form("Marking SHUTTLE done for run %d", run));
+ fLogbookEntry = entry;
+ if (!UpdateShuttleLogbook("shuttle_ignored"))
+ {
+ AliError(Form("Could not update logbook for run %d !", run));
+ }
+ fLogbookEntry = 0;
+
+ delete entry;
+ delete aRow;
+ delete aResult;
+ return 0;
+ }
+
+ if (startTime && !endTime)
+ {
+ // TODO Here we don't mark SHUTTLE done, because this may mean
+ //the run is still ongoing!!
+ Log("SHUTTLE",
+ Form("QueryRunParameters - Invalid parameters for Run %d: "
+ "startTime = %d, endTime = %d. Skipping (Shuttle won't be marked as DONE)!",
+ run, startTime, endTime));
+
+ //Log("SHUTTLE", Form("Marking SHUTTLE done for run %d", run));
+ //fLogbookEntry = entry;
+ //if (!UpdateShuttleLogbook("shuttle_done"))
+ //{
+ // AliError(Form("Could not update logbook for run %d !", run));
+ //}
+ //fLogbookEntry = 0;
+
+ delete entry;
+ delete aRow;
+ delete aResult;
+ return 0;
+ }
+
+ if (startTime && endTime && (startTime > endTime))
+ {
+ Log("SHUTTLE",
+ Form("QueryRunParameters - Invalid parameters for Run %d: "
+ "startTime = %d, endTime = %d. Skipping!",
+ run, startTime, endTime));
+
+ Log("SHUTTLE", Form("Marking SHUTTLE done for run %d", run));
+ fLogbookEntry = entry;
+ if (!UpdateShuttleLogbook("shuttle_ignored"))
+ {
+ AliError(Form("Could not update logbook for run %d !", run));
+ }
+ fLogbookEntry = 0;
+
+ delete entry;
+ delete aRow;
+ delete aResult;
+ return 0;
+ }
+
+ TString totEventsStr = entry->GetRunParameter("totalEvents");
+ Int_t totEvents = totEventsStr.Atoi();
+ if (totEvents < 1)
+ {
+ Log("SHUTTLE",
+ Form("QueryRunParameters - Run %d has 0 events - Skipping!", run));
+
+ Log("SHUTTLE", Form("Marking SHUTTLE done for run %d", run));
+ fLogbookEntry = entry;
+ if (!UpdateShuttleLogbook("shuttle_ignored"))
+ {
+ AliError(Form("Could not update logbook for run %d !", run));
+ }
+ fLogbookEntry = 0;
+
delete entry;
delete aRow;
delete aResult;
filePath.Data(), fileSize.Data(), fileChecksum.Data()));
// retrieved file is renamed to make it unique
- TString localFileName = Form("%s/%s_process/%s_%s_%d_%s_%s.shuttle",
- GetShuttleTempDir(), detector,
+ TString localFileName = Form("%s/%s_%d_process/%s_%s_%d_%s_%s.shuttle",
+ GetShuttleTempDir(), detector, GetCurrentRun(),
GetSystemName(system), detector, GetCurrentRun(),
id, sourceName.Data());
TObjString *fileParams = new TObjString(Form("%s#!?!#%s", id, sourceName.Data()));
fFXSlist[system].Add(fileParams);
- static TString staticLocalFileName = localFileName;
- //fullLocalFileName.Form("%s/%s_process/%s", GetShuttleTempDir(), detector, localFileName.Data());
-
+ static TString staticLocalFileName;
+ staticLocalFileName.Form("%s", localFileName.Data());
+
Log(fCurrentDetector, Form("GetFile - Retrieved file with id %s and "
"source %s from %s to %s", id, source,
GetSystemName(system), localFileName.Data()));
-
+
return staticLocalFileName.Data();
}
// if id is NULL all sources are returned (distinct)
//
- Log(detector, Form("GetFileSources - Retrieving sources with id %s from %s", id, GetSystemName(system)));
+ if (id)
+ {
+ Log(detector, Form("GetFileSources - Querying %s FXS for files with id %s produced by %s", GetSystemName(system), id, detector));
+ } else {
+ Log(detector, Form("GetFileSources - Querying %s FXS for files produced by %s", GetSystemName(system), detector));
+ }
// check if test mode should simulate a FXS error
if (fTestMode & kErrorFXSSources)
TString detName(detector);
TString setClause;
- if(detName == "shuttle_done")
+ if (detName == "shuttle_done" || detName == "shuttle_ignored")
{
setClause = "set shuttle_done=1";
- // Send the information to ML
- TMonaLisaText mlStatus("SHUTTLE_status", "Done");
-
- TList mlList;
- mlList.Add(&mlStatus);
+ if (detName == "shuttle_done")
+ {
+ // Send the information to ML
+ TMonaLisaText mlStatus("SHUTTLE_status", "Done");
- fMonaLisa->SendParameters(&mlList);
+ TList mlList;
+ mlList.Add(&mlStatus);
+
+ TString mlID;
+ mlID.Form("%d", GetCurrentRun());
+ fMonaLisa->SendParameters(&mlList, mlID);
+ }
} else {
TString statusStr(status);
if(statusStr.Contains("done", TString::kIgnoreCase) ||
return fLogbookEntry ? fLogbookEntry->GetEndTime() : 0;
}
+//______________________________________________________________________________________________
+UInt_t AliShuttle::GetCurrentYear() const
+{
+ //
+ // Get current year from logbook entry
+ //
+
+ if (!fLogbookEntry) return 0;
+
+ TTimeStamp startTime(GetCurrentStartTime());
+ TString year = Form("%d",startTime.GetDate());
+ year = year(0,4);
+
+ return year.Atoi();
+}
+
+//______________________________________________________________________________________________
+const char* AliShuttle::GetLHCPeriod() const
+{
+ //
+ // Get current LHC period from logbook entry
+ //
+
+ if (!fLogbookEntry) return 0;
+
+ return fLogbookEntry->GetRunParameter("LHCperiod");
+}
+
//______________________________________________________________________________________________
void AliShuttle::Log(const char* detector, const char* message)
{
// Fill log string with a message
//
- void* dir = gSystem->OpenDirectory(GetShuttleLogDir());
+ TString logRunDir = GetShuttleLogDir();
+ if (GetCurrentRun() >=0)
+ logRunDir += Form("/%d", GetCurrentRun());
+
+ void* dir = gSystem->OpenDirectory(logRunDir.Data());
if (dir == NULL) {
- if (gSystem->mkdir(GetShuttleLogDir(), kTRUE)) {
+ if (gSystem->mkdir(logRunDir.Data(), kTRUE)) {
AliError(Form("Can't open directory <%s>", GetShuttleLogDir()));
return;
}
TString fileName;
if (GetCurrentRun() >= 0)
- fileName.Form("%s/%s_%d.log", GetShuttleLogDir(), detector, GetCurrentRun());
- else
+ {
+ fileName.Form("%s/%d/%s_%d.log", GetShuttleLogDir(), GetCurrentRun(),
+ detector, GetCurrentRun());
+ } else {
fileName.Form("%s/%s.log", GetShuttleLogDir(), detector);
+ }
return fileName;
}
+//______________________________________________________________________________________________
+void AliShuttle::SendAlive()
+{
+ // sends alive message to ML
+
+ TMonaLisaText mlStatus("SHUTTLE_status", "Alive");
+
+ TList mlList;
+ mlList.Add(&mlStatus);
+
+ fMonaLisa->SendParameters(&mlList, "__PROCESSINGINFO__");
+}
+
//______________________________________________________________________________________________
Bool_t AliShuttle::Collect(Int_t run)
{
SetLastAction("Starting");
+ // create ML instance
+ if (!fMonaLisa)
+ fMonaLisa = new TMonaLisaWriter(fConfig->GetMonitorHost(), fConfig->GetMonitorTable());
+
+ SendAlive();
+ CountOpenRuns();
+
TString whereClause("where shuttle_done=0");
if (run != -1)
whereClause += Form(" and run=%d", run);
if (!RetrieveConditionsData(shuttleLogbookEntries))
{
Log("SHUTTLE", "Collect - Process of at least one run failed");
+ CountOpenRuns();
return kFALSE;
}
Log("SHUTTLE", "Collect - Requested run(s) successfully processed");
+ CountOpenRuns();
return kTRUE;
}
if (fTestMode != kNone)
return kTRUE;
+ TString to="";
+ TIter iterExperts(fConfig->GetResponsibles(fCurrentDetector));
+ TObjString *anExpert=0;
+ while ((anExpert = (TObjString*) iterExperts.Next()))
+ {
+ to += Form("%s,", anExpert->GetName());
+ }
+ if (to.Length() > 0)
+ to.Remove(to.Length()-1);
+ AliDebug(2, Form("to: %s",to.Data()));
+
+ if (to.IsNull()) {
+ Log("SHUTTLE", "List of detector responsibles not yet set!");
+ return kFALSE;
+ }
+
void* dir = gSystem->OpenDirectory(GetShuttleLogDir());
if (dir == NULL)
{
if (gSystem->mkdir(GetShuttleLogDir(), kTRUE))
{
- AliError(Form("Can't open directory <%s>", GetShuttleLogDir()));
+ Log("SHUTTLE", Form("SendMail - Can't open directory <%s>", GetShuttleLogDir()));
return kFALSE;
}
if (!mailBody.is_open())
{
- AliError(Form("Could not open mail body file %s", bodyFileName.Data()));
+ Log("SHUTTLE", Form("Could not open mail body file %s", bodyFileName.Data()));
return kFALSE;
}
- TString to="";
- TIter iterExperts(fConfig->GetResponsibles(fCurrentDetector));
- TObjString *anExpert=0;
- while ((anExpert = (TObjString*) iterExperts.Next()))
+ TString cc="alberto.colla@cern.ch";
+
+ TString subject = Form("%s Shuttle preprocessor FAILED in run %d (run type = %s)!",
+ fCurrentDetector.Data(), GetCurrentRun(), GetRunType());
+ AliDebug(2, Form("subject: %s", subject.Data()));
+
+ TString body = Form("Dear %s expert(s), \n\n", fCurrentDetector.Data());
+ body += Form("SHUTTLE just detected that your preprocessor "
+ "failed processing run %d (run type = %s)!!\n\n",
+ GetCurrentRun(), GetRunType());
+ body += Form("Please check %s status on the SHUTTLE monitoring page: \n\n",
+ fCurrentDetector.Data());
+ if (fConfig->GetRunMode() == AliShuttleConfig::kTest)
{
- to += Form("%s,", anExpert->GetName());
+ body += Form("\thttp://pcalimonitor.cern.ch:8889/shuttle.jsp?time=168 \n\n");
+ } else {
+ body += Form("\thttp://pcalimonitor.cern.ch/shuttle.jsp?instance=PROD&time=168 \n\n");
+ }
+
+
+ TString logFolder = "logs";
+ if (fConfig->GetRunMode() == AliShuttleConfig::kProd)
+ logFolder += "_PROD";
+
+
+ body += Form("Find the %s log for the current run on \n\n"
+ "\thttp://pcalishuttle01.cern.ch:8880/%s/%d/%s_%d.log \n\n",
+ fCurrentDetector.Data(), logFolder.Data(), GetCurrentRun(),
+ fCurrentDetector.Data(), GetCurrentRun());
+ body += Form("The last 10 lines of %s log file are following:\n\n", fCurrentDetector.Data());
+
+ AliDebug(2, Form("Body begin: %s", body.Data()));
+
+ mailBody << body.Data();
+ mailBody.close();
+ mailBody.open(bodyFileName, ofstream::out | ofstream::app);
+
+ TString logFileName = Form("%s/%d/%s_%d.log", GetShuttleLogDir(),
+ GetCurrentRun(), fCurrentDetector.Data(), GetCurrentRun());
+ TString tailCommand = Form("tail -n 10 %s >> %s", logFileName.Data(), bodyFileName.Data());
+ if (gSystem->Exec(tailCommand.Data()))
+ {
+ mailBody << Form("%s log file not found ...\n\n", fCurrentDetector.Data());
+ }
+
+ TString endBody = Form("------------------------------------------------------\n\n");
+ endBody += Form("In case of problems please contact the SHUTTLE core team.\n\n");
+ endBody += "Please do not answer this message directly, it is automatically generated.\n\n";
+ endBody += "Greetings,\n\n \t\t\tthe SHUTTLE\n";
+
+ AliDebug(2, Form("Body end: %s", endBody.Data()));
+
+ mailBody << endBody.Data();
+
+ mailBody.close();
+
+ // send mail!
+ TString mailCommand = Form("mail -s \"%s\" -c %s %s < %s",
+ subject.Data(),
+ cc.Data(),
+ to.Data(),
+ bodyFileName.Data());
+ AliDebug(2, Form("mail command: %s", mailCommand.Data()));
+
+ Bool_t result = gSystem->Exec(mailCommand.Data());
+
+ return result == 0;
+}
+
+//______________________________________________________________________________________________
+Bool_t AliShuttle::SendMailToDCS()
+{
+ //
+ // sends a mail to the DCS experts in case of DCS error
+ //
+
+ if (fTestMode != kNone)
+ return kTRUE;
+
+ void* dir = gSystem->OpenDirectory(GetShuttleLogDir());
+ if (dir == NULL)
+ {
+ if (gSystem->mkdir(GetShuttleLogDir(), kTRUE))
+ {
+ Log("SHUTTLE", Form("SendMailToDCS - Can't open directory <%s>", GetShuttleLogDir()));
+ return kFALSE;
+ }
+
+ } else {
+ gSystem->FreeDirectory(dir);
}
- to.Remove(to.Length()-1);
+
+ TString bodyFileName;
+ bodyFileName.Form("%s/mail.body", GetShuttleLogDir());
+ gSystem->ExpandPathName(bodyFileName);
+
+ ofstream mailBody;
+ mailBody.open(bodyFileName, ofstream::out);
+
+ if (!mailBody.is_open())
+ {
+ Log("SHUTTLE", Form("SendMailToDCS - Could not open mail body file %s", bodyFileName.Data()));
+ return kFALSE;
+ }
+
+ TString to="Vladimir.Fekete@cern.ch, Svetozar.Kapusta@cern.ch";
+ //TString to="alberto.colla@cern.ch";
AliDebug(2, Form("to: %s",to.Data()));
if (to.IsNull()) {
TString cc="alberto.colla@cern.ch";
- TString subject = Form("%s Shuttle preprocessor FAILED in run %d !",
+ TString subject = Form("Retrieval of data points for %s FAILED in run %d !",
fCurrentDetector.Data(), GetCurrentRun());
AliDebug(2, Form("subject: %s", subject.Data()));
- TString body = Form("Dear %s expert(s), \n\n", fCurrentDetector.Data());
- body += Form("SHUTTLE just detected that your preprocessor "
- "failed processing run %d!!\n\n", GetCurrentRun());
- body += Form("Please check %s status on the SHUTTLE monitoring page: \n\n", fCurrentDetector.Data());
- body += Form("\thttp://pcalimonitor.cern.ch:8889/shuttle.jsp?time=168 \n\n");
+ TString body = Form("Dear DCS experts, \n\n");
+ body += Form("SHUTTLE couldn\'t retrieve the data points for detector %s "
+ "in run %d!!\n\n", fCurrentDetector.Data(), GetCurrentRun());
+ body += Form("Please check %s status on the SHUTTLE monitoring page: \n\n",
+ fCurrentDetector.Data());
+ if (fConfig->GetRunMode() == AliShuttleConfig::kTest)
+ {
+ body += Form("\thttp://pcalimonitor.cern.ch:8889/shuttle.jsp?time=168 \n\n");
+ } else {
+ body += Form("\thttp://pcalimonitor.cern.ch/shuttle.jsp?instance=PROD?time=168 \n\n");
+ }
+
+ TString logFolder = "logs";
+ if (fConfig->GetRunMode() == AliShuttleConfig::kProd)
+ logFolder += "_PROD";
+
+
body += Form("Find the %s log for the current run on \n\n"
- "\thttp://pcalishuttle01.cern.ch:8880/logs/%s_%d.log \n\n",
- fCurrentDetector.Data(), fCurrentDetector.Data(), GetCurrentRun());
- body += Form("The last 10 lines of %s log file are following:\n\n");
+ "\thttp://pcalishuttle01.cern.ch:8880/%s/%d/%s_%d.log \n\n",
+ fCurrentDetector.Data(), logFolder.Data(), GetCurrentRun(),
+ fCurrentDetector.Data(), GetCurrentRun());
+ body += Form("The last 10 lines of %s log file are following:\n\n", fCurrentDetector.Data());
AliDebug(2, Form("Body begin: %s", body.Data()));
mailBody.close();
mailBody.open(bodyFileName, ofstream::out | ofstream::app);
- TString logFileName = Form("%s/%s_%d.log", GetShuttleLogDir(), fCurrentDetector.Data(), GetCurrentRun());
+ TString logFileName = Form("%s/%d/%s_%d.log", GetShuttleLogDir(), GetCurrentRun(),
+ fCurrentDetector.Data(), GetCurrentRun());
TString tailCommand = Form("tail -n 10 %s >> %s", logFileName.Data(), bodyFileName.Data());
if (gSystem->Exec(tailCommand.Data()))
{