/*
$Log$
+Revision 1.73 2007/12/14 19:31:36 acolla
+Sending email to DCS experts is temporarily commented
+
+Revision 1.72 2007/12/13 15:44:28 acolla
+Run type added in mail sent to detector expert (eases understanding)
+
+Revision 1.71 2007/12/12 14:56:14 jgrosseo
+sending shuttle_ignore to ML also in case of 0 events
+
+Revision 1.70 2007/12/12 13:45:35 acolla
+Monalisa started in Collect() function. Alive message to monitor is sent at each Collect and every minute during preprocessor processing.
+
+Revision 1.69 2007/12/12 10:06:29 acolla
+in AliShuttle.cxx: SHUTTLE logbook is updated in case of invalid run times:
+
+time_start==0 && time_end==0
+
+logbook is NOT updated if time_start != 0 && time_end == 0, because it may mean that the run is still ongoing.
+
+Revision 1.68 2007/12/11 10:15:17 acolla
+Added marking SHUTTLE=DONE for invalid runs
+(invalid start time or end time) and runs with totalEvents < 1
+
+Revision 1.67 2007/12/07 19:14:36 acolla
+in AliShuttleTrigger:
+
+Added automatic collection of new runs on a regular time basis (settable from the configuration)
+
+in AliShuttleConfig: new members
+
+- triggerWait: time to wait for DIM trigger (s) before starting automatic collection of new runs
+- mode: run mode (test, prod) -> used to build log folder (logs or logs_PROD)
+
+in AliShuttle:
+
+- logs now stored in logs/#RUN/DET_#RUN.log
+
+Revision 1.66 2007/12/05 10:45:19 jgrosseo
+changed order of arguments to TMonaLisaWriter
+
+Revision 1.65 2007/11/26 16:58:37 acolla
+Monalisa configuration added: host and table name
+
+Revision 1.64 2007/11/13 16:15:47 acolla
+DCS map is stored in a file in the temp folder where the detector is processed.
+If the preprocessor fails, the temp folder is not removed. This will help the debugging of the problem.
+
+Revision 1.63 2007/11/02 10:53:16 acolla
+Protection added to AliShuttle::CopyFileLocally
+
+Revision 1.62 2007/10/31 18:23:13 acolla
+Furter developement on the Shuttle:
+
+- Shuttle now connects to the Grid as alidaq. The OCDB and Reference folders
+are now built from /alice/data, e.g.:
+/alice/data/2007/LHC07a/OCDB
+
+the year and LHC period are taken from the Shuttle.
+Raw metadata files are stored by GRP to:
+/alice/data/2007/LHC07a/<runNb>/Raw/RunMetadata.root
+
+- Shuttle sends a mail to DCS experts each time DP retrieval fails.
+
Revision 1.61 2007/10/30 20:33:51 acolla
Improved managing of temporary folders, which weren't correctly handled.
Resolved bug introduced in StoreReferenceFile, which caused SPD preprocessor fail.
Log("SHUTTLE", Form("StoreOCDB - %s: object %s has validity infinite but "
"there are previous unprocessed runs!",
fCurrentDetector.Data(), aLocId.GetPath().Data()));
+ result = kFALSE;
continue;
}
gSystem->FreeDirectory(dir);
}
- Int_t result = gSystem->GetPathInfo(localFile, 0, (Long64_t*) 0, 0, 0);
+ Int_t result = 0;
+
+ result = gSystem->GetPathInfo(localFile, 0, (Long64_t*) 0, 0, 0);
if (result)
{
Log("SHUTTLE", Form("StoreFileLocally - %s does not exist", localFile));
return kFALSE;
}
+ result = gSystem->GetPathInfo(target, 0, (Long64_t*) 0, 0, 0);
+ if (!result)
+ {
+ Log("SHUTTLE", Form("StoreFileLocally - target file %s already exist, removing...", target.Data()));
+ if (gSystem->Unlink(target.Data()))
+ {
+ Log("SHUTTLE", Form("StoreFileLocally - Could not remove existing target file %s!", target.Data()));
+ return kFALSE;
+ }
+ }
+
result = gSystem->CopyFile(localFile, target);
if (result == 0)
mlList.Add(&mlStatus);
mlList.Add(&mlRetryCount);
- fMonaLisa->SendParameters(&mlList);
+ TString mlID;
+ mlID.Form("%d", GetCurrentRun());
+ fMonaLisa->SendParameters(&mlList, mlID);
}
//______________________________________________________________________________________________
Log("SHUTTLE", Form("ContinueProcessing - %s: all objects "
"successfully stored into main storage",
fCurrentDetector.Data()));
- UpdateShuttleStatus(AliShuttleStatus::kDone);
- UpdateShuttleLogbook(fCurrentDetector.Data(), "DONE");
} else {
Log("SHUTTLE",
Form("ContinueProcessing - %s: Grid storage failed again",
Log("SHUTTLE", Form("\t\t\t^*^*^*^*^*^*^*^*^*^*^*^* run %d: START ^*^*^*^*^*^*^*^*^*^*^*^*",
GetCurrentRun()));
- // create ML instance that monitors this run
- fMonaLisa = new TMonaLisaWriter(Form("%d", GetCurrentRun()), "SHUTTLE", "aliendb1.cern.ch");
- // disable monitoring of other parameters that come e.g. from TFile
- gMonitoringWriter = 0;
-
// Send the information to ML
TMonaLisaText mlStatus("SHUTTLE_status", "Processing");
TMonaLisaText mlRunType("SHUTTLE_runtype", Form("%s (%s)", entry->GetRunType(), entry->GetRunParameter("log")));
mlList.Add(&mlStatus);
mlList.Add(&mlRunType);
- fMonaLisa->SendParameters(&mlList);
+ TString mlID;
+ mlID.Form("%d", GetCurrentRun());
+ fMonaLisa->SendParameters(&mlList, mlID);
if (fLogbookEntry->IsDone())
{
TString lhcPeriod(GetLHCPeriod());
if (lhcPeriod.Length() == 0)
{
- Log("SHUTTLE","StoreRunMetaDataFile - LHCPeriod not found in logbook!");
- return 0;
+ Log("SHUTTLE","Process - LHCPeriod not found in logbook!");
+ return 0;
}
if (fgkMainCDB.Length() == 0)
fgkMainRefStorage = Form("alien://folder=/alice/data/%d/%s/Reference?user=alidaq?cacheFold=/tmp/OCDBCache",
GetCurrentYear(), lhcPeriod.Data());
- AliCDBStorage *mainCDBSto = AliCDBManager::Instance()->GetStorage(fgkMainCDB);
- if(mainCDBSto) mainCDBSto->QueryCDB(GetCurrentRun());
- AliCDBStorage *mainRefSto = AliCDBManager::Instance()->GetStorage(fgkMainRefStorage);
- if(mainRefSto) mainRefSto->QueryCDB(GetCurrentRun());
-
// Loop on detectors in the configuration
TIter iter(fConfig->GetDetectors());
TObjString* aDetector = 0;
+ Bool_t first = kTRUE;
+
while ((aDetector = (TObjString*) iter.Next()))
{
fCurrentDetector = aDetector->String();
if (ContinueProcessing() == kFALSE) continue;
+
+ if (first)
+ {
+ // only read QueryCDB when needed and only once
+ AliCDBStorage *mainCDBSto = AliCDBManager::Instance()->GetStorage(fgkMainCDB);
+ if(mainCDBSto) mainCDBSto->QueryCDB(GetCurrentRun());
+ AliCDBStorage *mainRefSto = AliCDBManager::Instance()->GetStorage(fgkMainRefStorage);
+ if(mainRefSto) mainRefSto->QueryCDB(GetCurrentRun());
+ first = kFALSE;
+ }
Log("SHUTTLE", Form("\t\t\t****** run %d - %s: START ******",
GetCurrentRun(), aDetector->GetName()));
}
if (expiredTime % 60 == 0)
+ {
Log("SHUTTLE", Form("Process - %s: Checking process. "
"Run time: %d seconds - Memory consumption: %d KB",
fCurrentDetector.Data(), expiredTime, mem));
+ SendAlive();
+ }
if (mem > fConfig->GetPPMaxMem())
{
Bool_t success = ProcessCurrentDetector();
gSystem->ChangeDirectory(wd.Data());
-
- gSystem->Exec(Form("rm -rf %s",tmpDir.Data()));
-
+
if (success) // Preprocessor finished successfully!
{
+ // remove temporary folder
+ gSystem->Exec(Form("rm -rf %s",tmpDir.Data()));
+
// Update time_processed field in FXS DB
if (UpdateTable() == kFALSE)
Log("SHUTTLE", Form("Process - %s: Could not update FXS databases!",
TObjArray checkEntryArray;
checkEntryArray.SetOwner(1);
TString whereClause = Form("where run=%d", GetCurrentRun());
- if (!QueryShuttleLogbook(whereClause.Data(), checkEntryArray) || checkEntryArray.GetEntries() == 0) {
+ if (!QueryShuttleLogbook(whereClause.Data(), checkEntryArray) ||
+ checkEntryArray.GetEntries() == 0) {
Log("SHUTTLE", Form("Process - Warning: Cannot check status of run %d on Shuttle logbook!",
GetCurrentRun()));
return hasError == kFALSE;
}
}
- // remove ML instance
- delete fMonaLisa;
- fMonaLisa = 0;
-
fLogbookEntry = 0;
return hasError == kFALSE;
" Sending mail to DCS experts!", host.Data()));
UpdateShuttleStatus(AliShuttleStatus::kDCSError);
- if (!SendMailToDCS())
- Log("SHUTTLE", Form("ProcessCurrentDetector - Could not send mail to DCS experts!"));
+ //if (!SendMailToDCS())
+ // Log("SHUTTLE", Form("ProcessCurrentDetector - Could not send mail to DCS experts!"));
delete dcsMap;
return kFALSE;
" Sending mail to DCS experts!", host.Data()));
UpdateShuttleStatus(AliShuttleStatus::kDCSError);
- if (!SendMailToDCS())
- Log("SHUTTLE", Form("ProcessCurrentDetector - Could not send mail to DCS experts!"));
+ //if (!SendMailToDCS())
+ // Log("SHUTTLE", Form("ProcessCurrentDetector - Could not send mail to DCS experts!"));
if (aliasMap) delete aliasMap;
delete dcsMap;
}
}
+ // save map into file, to help debugging in case of preprocessor error
+ /*TFile* f = TFile::Open("DCSMap.root","recreate");
+ f->cd();
+ dcsMap->Write("DCSMap", TObject::kSingleKey);
+ f->Close();
+ delete f;*/
+
// DCS Archive DB processing successful. Call Preprocessor!
UpdateShuttleStatus(AliShuttleStatus::kPPStarted);
UInt_t startTime = entry->GetStartTime();
UInt_t endTime = entry->GetEndTime();
- if (!startTime || !endTime || startTime > endTime) {
+// if (!startTime || !endTime || startTime > endTime)
+// {
+// Log("SHUTTLE",
+// Form("QueryRunParameters - Invalid parameters for Run %d: startTime = %d, endTime = %d. Skipping!",
+// run, startTime, endTime));
+//
+// Log("SHUTTLE", Form("Marking SHUTTLE done for run %d", run));
+// fLogbookEntry = entry;
+// if (!UpdateShuttleLogbook("shuttle_done"))
+// {
+// AliError(Form("Could not update logbook for run %d !", run));
+// }
+// fLogbookEntry = 0;
+//
+// delete entry;
+// delete aRow;
+// delete aResult;
+// return 0;
+// }
+
+ if (!startTime)
+ {
+ Log("SHUTTLE",
+ Form("QueryRunParameters - Invalid parameters for Run %d: "
+ "startTime = %d, endTime = %d. Skipping!",
+ run, startTime, endTime));
+
+ Log("SHUTTLE", Form("Marking SHUTTLE done for run %d", run));
+ fLogbookEntry = entry;
+ if (!UpdateShuttleLogbook("shuttle_ignored"))
+ {
+ AliError(Form("Could not update logbook for run %d !", run));
+ }
+ fLogbookEntry = 0;
+
+ delete entry;
+ delete aRow;
+ delete aResult;
+ return 0;
+ }
+
+ if (startTime && !endTime)
+ {
+ // TODO Here we don't mark SHUTTLE done, because this may mean
+ //the run is still ongoing!!
+ Log("SHUTTLE",
+ Form("QueryRunParameters - Invalid parameters for Run %d: "
+ "startTime = %d, endTime = %d. Skipping (Shuttle won't be marked as DONE)!",
+ run, startTime, endTime));
+
+ //Log("SHUTTLE", Form("Marking SHUTTLE done for run %d", run));
+ //fLogbookEntry = entry;
+ //if (!UpdateShuttleLogbook("shuttle_done"))
+ //{
+ // AliError(Form("Could not update logbook for run %d !", run));
+ //}
+ //fLogbookEntry = 0;
+
+ delete entry;
+ delete aRow;
+ delete aResult;
+ return 0;
+ }
+
+ if (startTime && endTime && (startTime > endTime))
+ {
+ Log("SHUTTLE",
+ Form("QueryRunParameters - Invalid parameters for Run %d: "
+ "startTime = %d, endTime = %d. Skipping!",
+ run, startTime, endTime));
+
+ Log("SHUTTLE", Form("Marking SHUTTLE done for run %d", run));
+ fLogbookEntry = entry;
+ if (!UpdateShuttleLogbook("shuttle_ignored"))
+ {
+ AliError(Form("Could not update logbook for run %d !", run));
+ }
+ fLogbookEntry = 0;
+
+ delete entry;
+ delete aRow;
+ delete aResult;
+ return 0;
+ }
+
+ TString totEventsStr = entry->GetRunParameter("totalEvents");
+ Int_t totEvents = totEventsStr.Atoi();
+ if (totEvents < 1)
+ {
Log("SHUTTLE",
- Form("QueryRunParameters - Invalid parameters for Run %d: startTime = %d, endTime = %d",
- run, startTime, endTime));
+ Form("QueryRunParameters - Run %d has 0 events - Skipping!", run));
+
+ Log("SHUTTLE", Form("Marking SHUTTLE done for run %d", run));
+ fLogbookEntry = entry;
+ if (!UpdateShuttleLogbook("shuttle_ignored"))
+ {
+ AliError(Form("Could not update logbook for run %d !", run));
+ }
+ fLogbookEntry = 0;
+
delete entry;
delete aRow;
delete aResult;
TString detName(detector);
TString setClause;
- if(detName == "shuttle_done")
+ if (detName == "shuttle_done" || detName == "shuttle_ignored")
{
setClause = "set shuttle_done=1";
- // Send the information to ML
- TMonaLisaText mlStatus("SHUTTLE_status", "Done");
-
- TList mlList;
- mlList.Add(&mlStatus);
+ if (detName == "shuttle_done")
+ {
+ // Send the information to ML
+ TMonaLisaText mlStatus("SHUTTLE_status", "Done");
- fMonaLisa->SendParameters(&mlList);
+ TList mlList;
+ mlList.Add(&mlStatus);
+
+ TString mlID;
+ mlID.Form("%d", GetCurrentRun());
+ fMonaLisa->SendParameters(&mlList, mlID);
+ }
} else {
TString statusStr(status);
if(statusStr.Contains("done", TString::kIgnoreCase) ||
// Fill log string with a message
//
- void* dir = gSystem->OpenDirectory(GetShuttleLogDir());
+ TString logRunDir = GetShuttleLogDir();
+ if (GetCurrentRun() >=0)
+ logRunDir += Form("/%d", GetCurrentRun());
+
+ void* dir = gSystem->OpenDirectory(logRunDir.Data());
if (dir == NULL) {
- if (gSystem->mkdir(GetShuttleLogDir(), kTRUE)) {
+ if (gSystem->mkdir(logRunDir.Data(), kTRUE)) {
AliError(Form("Can't open directory <%s>", GetShuttleLogDir()));
return;
}
TString fileName;
if (GetCurrentRun() >= 0)
- fileName.Form("%s/%s_%d.log", GetShuttleLogDir(), detector, GetCurrentRun());
- else
+ {
+ fileName.Form("%s/%d/%s_%d.log", GetShuttleLogDir(), GetCurrentRun(),
+ detector, GetCurrentRun());
+ } else {
fileName.Form("%s/%s.log", GetShuttleLogDir(), detector);
+ }
return fileName;
}
+//______________________________________________________________________________________________
+void AliShuttle::SendAlive()
+{
+ // sends alive message to ML
+
+ TMonaLisaText mlStatus("SHUTTLE_status", "Alive");
+
+ TList mlList;
+ mlList.Add(&mlStatus);
+
+ fMonaLisa->SendParameters(&mlList, "__PROCESSINGINFO__");
+}
+
//______________________________________________________________________________________________
Bool_t AliShuttle::Collect(Int_t run)
{
SetLastAction("Starting");
+ // create ML instance
+ if (!fMonaLisa)
+ fMonaLisa = new TMonaLisaWriter(fConfig->GetMonitorHost(), fConfig->GetMonitorTable());
+
+
+ SendAlive();
+
TString whereClause("where shuttle_done=0");
if (run != -1)
whereClause += Form(" and run=%d", run);
gSystem->FreeDirectory(dir);
}
- TString bodyFileName;
- bodyFileName.Form("%s/mail.body", GetShuttleLogDir());
- gSystem->ExpandPathName(bodyFileName);
-
- ofstream mailBody;
- mailBody.open(bodyFileName, ofstream::out);
-
- if (!mailBody.is_open())
- {
- Log("SHUTTLE", Form("Could not open mail body file %s", bodyFileName.Data()));
- return kFALSE;
- }
-
TString to="";
TIter iterExperts(fConfig->GetResponsibles(fCurrentDetector));
TObjString *anExpert=0;
{
to += Form("%s,", anExpert->GetName());
}
- to.Remove(to.Length()-1);
+ if (to.Length() > 0)
+ to.Remove(to.Length()-1);
AliDebug(2, Form("to: %s",to.Data()));
if (to.IsNull()) {
return kFALSE;
}
+ TString bodyFileName;
+ bodyFileName.Form("%s/mail.body", GetShuttleLogDir());
+ gSystem->ExpandPathName(bodyFileName);
+
+ ofstream mailBody;
+ mailBody.open(bodyFileName, ofstream::out);
+
+ if (!mailBody.is_open())
+ {
+ Log("SHUTTLE", Form("Could not open mail body file %s", bodyFileName.Data()));
+ return kFALSE;
+ }
+
TString cc="alberto.colla@cern.ch";
- TString subject = Form("%s Shuttle preprocessor FAILED in run %d !",
- fCurrentDetector.Data(), GetCurrentRun());
+ TString subject = Form("%s Shuttle preprocessor FAILED in run %d (run type = %s)!",
+ fCurrentDetector.Data(), GetCurrentRun(), GetRunType());
AliDebug(2, Form("subject: %s", subject.Data()));
TString body = Form("Dear %s expert(s), \n\n", fCurrentDetector.Data());
body += Form("SHUTTLE just detected that your preprocessor "
- "failed processing run %d!!\n\n", GetCurrentRun());
- body += Form("Please check %s status on the SHUTTLE monitoring page: \n\n", fCurrentDetector.Data());
- body += Form("\thttp://pcalimonitor.cern.ch:8889/shuttle.jsp?time=168 \n\n");
+ "failed processing run %d (run type = %s)!!\n\n",
+ GetCurrentRun(), GetRunType());
+ body += Form("Please check %s status on the SHUTTLE monitoring page: \n\n",
+ fCurrentDetector.Data());
+ if (fConfig->GetRunMode() == AliShuttleConfig::kTest)
+ {
+ body += Form("\thttp://pcalimonitor.cern.ch:8889/shuttle.jsp?time=168 \n\n");
+ } else {
+ body += Form("\thttp://pcalimonitor.cern.ch/shuttle.jsp?instance=PROD&time=168 \n\n");
+ }
+
+
+ TString logFolder = "logs";
+ if (fConfig->GetRunMode() == AliShuttleConfig::kProd)
+ logFolder += "_PROD";
+
+
body += Form("Find the %s log for the current run on \n\n"
- "\thttp://pcalishuttle01.cern.ch:8880/logs/%s_%d.log \n\n",
- fCurrentDetector.Data(), fCurrentDetector.Data(), GetCurrentRun());
- body += Form("The last 10 lines of %s log file are following:\n\n");
+ "\thttp://pcalishuttle01.cern.ch:8880/%s/%d/%s_%d.log \n\n",
+ fCurrentDetector.Data(), logFolder.Data(), GetCurrentRun(),
+ fCurrentDetector.Data(), GetCurrentRun());
+ body += Form("The last 10 lines of %s log file are following:\n\n", fCurrentDetector.Data());
AliDebug(2, Form("Body begin: %s", body.Data()));
mailBody.close();
mailBody.open(bodyFileName, ofstream::out | ofstream::app);
- TString logFileName = Form("%s/%s_%d.log", GetShuttleLogDir(), fCurrentDetector.Data(), GetCurrentRun());
+ TString logFileName = Form("%s/%d/%s_%d.log", GetShuttleLogDir(),
+ GetCurrentRun(), fCurrentDetector.Data(), GetCurrentRun());
TString tailCommand = Form("tail -n 10 %s >> %s", logFileName.Data(), bodyFileName.Data());
if (gSystem->Exec(tailCommand.Data()))
{
TString body = Form("Dear DCS experts, \n\n");
body += Form("SHUTTLE couldn\'t retrieve the data points for detector %s "
"in run %d!!\n\n", fCurrentDetector.Data(), GetCurrentRun());
- body += Form("Please check %s status on the SHUTTLE monitoring page: \n\n", fCurrentDetector.Data());
- body += Form("\thttp://pcalimonitor.cern.ch:8889/shuttle.jsp?time=168 \n\n");
+ body += Form("Please check %s status on the SHUTTLE monitoring page: \n\n",
+ fCurrentDetector.Data());
+ if (fConfig->GetRunMode() == AliShuttleConfig::kTest)
+ {
+ body += Form("\thttp://pcalimonitor.cern.ch:8889/shuttle.jsp?time=168 \n\n");
+ } else {
+ body += Form("\thttp://pcalimonitor.cern.ch/shuttle.jsp?instance=PROD?time=168 \n\n");
+ }
+
+ TString logFolder = "logs";
+ if (fConfig->GetRunMode() == AliShuttleConfig::kProd)
+ logFolder += "_PROD";
+
+
body += Form("Find the %s log for the current run on \n\n"
- "\thttp://pcalishuttle01.cern.ch:8880/logs/%s_%d.log \n\n",
- fCurrentDetector.Data(), fCurrentDetector.Data(), GetCurrentRun());
- body += Form("The last 10 lines of %s log file are following:\n\n");
+ "\thttp://pcalishuttle01.cern.ch:8880/%s/%d/%s_%d.log \n\n",
+ fCurrentDetector.Data(), logFolder.Data(), GetCurrentRun(),
+ fCurrentDetector.Data(), GetCurrentRun());
+ body += Form("The last 10 lines of %s log file are following:\n\n", fCurrentDetector.Data());
AliDebug(2, Form("Body begin: %s", body.Data()));
mailBody.close();
mailBody.open(bodyFileName, ofstream::out | ofstream::app);
- TString logFileName = Form("%s/%s_%d.log", GetShuttleLogDir(), fCurrentDetector.Data(), GetCurrentRun());
+ TString logFileName = Form("%s/%d/%s_%d.log", GetShuttleLogDir(), GetCurrentRun(),
+ fCurrentDetector.Data(), GetCurrentRun());
TString tailCommand = Form("tail -n 10 %s >> %s", logFileName.Data(), bodyFileName.Data());
if (gSystem->Exec(tailCommand.Data()))
{