/*
$Log$
+Revision 1.75 2007/12/18 15:42:14 jgrosseo
+adding number of open runs to monitoring
+
+Revision 1.74 2007/12/17 03:23:32 jgrosseo
+several bugfixes
+added "empty preprocessor" as placeholder for Acorde in FDR
+
+Revision 1.73 2007/12/14 19:31:36 acolla
+Sending email to DCS experts is temporarily commented
+
+Revision 1.72 2007/12/13 15:44:28 acolla
+Run type added in mail sent to detector expert (eases understanding)
+
+Revision 1.71 2007/12/12 14:56:14 jgrosseo
+sending shuttle_ignore to ML also in case of 0 events
+
+Revision 1.70 2007/12/12 13:45:35 acolla
+Monalisa started in Collect() function. Alive message to monitor is sent at each Collect and every minute during preprocessor processing.
+
+Revision 1.69 2007/12/12 10:06:29 acolla
+in AliShuttle.cxx: SHUTTLE logbook is updated in case of invalid run times:
+
+time_start==0 && time_end==0
+
+logbook is NOT updated if time_start != 0 && time_end == 0, because it may mean that the run is still ongoing.
+
+Revision 1.68 2007/12/11 10:15:17 acolla
+Added marking SHUTTLE=DONE for invalid runs
+(invalid start time or end time) and runs with totalEvents < 1
+
+Revision 1.67 2007/12/07 19:14:36 acolla
+in AliShuttleTrigger:
+
+Added automatic collection of new runs on a regular time basis (settable from the configuration)
+
+in AliShuttleConfig: new members
+
+- triggerWait: time to wait for DIM trigger (s) before starting automatic collection of new runs
+- mode: run mode (test, prod) -> used to build log folder (logs or logs_PROD)
+
+in AliShuttle:
+
+- logs now stored in logs/#RUN/DET_#RUN.log
+
+Revision 1.66 2007/12/05 10:45:19 jgrosseo
+changed order of arguments to TMonaLisaWriter
+
+Revision 1.65 2007/11/26 16:58:37 acolla
+Monalisa configuration added: host and table name
+
Revision 1.64 2007/11/13 16:15:47 acolla
DCS map is stored in a file in the temp folder where the detector is processed.
If the preprocessor fails, the temp folder is not removed. This will help the debugging of the problem.
Log("SHUTTLE", Form("StoreOCDB - %s: object %s has validity infinite but "
"there are previous unprocessed runs!",
fCurrentDetector.Data(), aLocId.GetPath().Data()));
+ result = kFALSE;
continue;
}
TString localBaseFolder = sto->GetBaseFolder();
// Build Run level folder
- // folder = /alice/data/year/lhcPeriod/runNb/Raw
+ // folder = /alice/data/year/lhcPeriod/runNb/raw
TString lhcPeriod = GetLHCPeriod();
return 0;
}
- TString target = Form("%s/GRP/RunMetadata/alice/data/%d/%s/%09d/Raw/%s",
+ TString target = Form("%s/GRP/RunMetadata/alice/data/%d/%s/%09d/raw/%s",
localBaseFolder.Data(), GetCurrentYear(),
lhcPeriod.Data(), GetCurrentRun(), gridFileName);
return 0;
}
- dir = Form("%s/GRP/RunMetadata/alice/data/%d/%s/%09d/Raw",
+ dir = Form("%s/GRP/RunMetadata/alice/data/%d/%s/%09d/raw",
localBaseFolder.Data(), GetCurrentYear(),
lhcPeriod.Data(), GetCurrentRun());
alienDir = dir(dir.Index("/alice/data/"), dir.Length());
mlList.Add(&mlStatus);
mlList.Add(&mlRetryCount);
- fMonaLisa->SendParameters(&mlList);
+ TString mlID;
+ mlID.Form("%d", GetCurrentRun());
+ fMonaLisa->SendParameters(&mlList, mlID);
}
//______________________________________________________________________________________________
Log("SHUTTLE", Form("ContinueProcessing - %s: all objects "
"successfully stored into main storage",
fCurrentDetector.Data()));
- UpdateShuttleStatus(AliShuttleStatus::kDone);
- UpdateShuttleLogbook(fCurrentDetector.Data(), "DONE");
} else {
Log("SHUTTLE",
Form("ContinueProcessing - %s: Grid storage failed again",
Log("SHUTTLE", Form("\t\t\t^*^*^*^*^*^*^*^*^*^*^*^* run %d: START ^*^*^*^*^*^*^*^*^*^*^*^*",
GetCurrentRun()));
- // create ML instance that monitors this run
- fMonaLisa = new TMonaLisaWriter(Form("%d", GetCurrentRun()), fConfig->GetMonitorTable(),
- fConfig->GetMonitorHost());
- // disable monitoring of other parameters that come e.g. from TFile
- gMonitoringWriter = 0;
-
// Send the information to ML
TMonaLisaText mlStatus("SHUTTLE_status", "Processing");
TMonaLisaText mlRunType("SHUTTLE_runtype", Form("%s (%s)", entry->GetRunType(), entry->GetRunParameter("log")));
mlList.Add(&mlStatus);
mlList.Add(&mlRunType);
- fMonaLisa->SendParameters(&mlList);
+ TString mlID;
+ mlID.Form("%d", GetCurrentRun());
+ fMonaLisa->SendParameters(&mlList, mlID);
if (fLogbookEntry->IsDone())
{
TString lhcPeriod(GetLHCPeriod());
if (lhcPeriod.Length() == 0)
{
- Log("SHUTTLE","StoreRunMetaDataFile - LHCPeriod not found in logbook!");
- return 0;
+ Log("SHUTTLE","Process - LHCPeriod not found in logbook!");
+ return 0;
}
if (fgkMainCDB.Length() == 0)
fgkMainRefStorage = Form("alien://folder=/alice/data/%d/%s/Reference?user=alidaq?cacheFold=/tmp/OCDBCache",
GetCurrentYear(), lhcPeriod.Data());
- AliCDBStorage *mainCDBSto = AliCDBManager::Instance()->GetStorage(fgkMainCDB);
- if(mainCDBSto) mainCDBSto->QueryCDB(GetCurrentRun());
- AliCDBStorage *mainRefSto = AliCDBManager::Instance()->GetStorage(fgkMainRefStorage);
- if(mainRefSto) mainRefSto->QueryCDB(GetCurrentRun());
-
// Loop on detectors in the configuration
TIter iter(fConfig->GetDetectors());
TObjString* aDetector = 0;
+ Bool_t first = kTRUE;
+
while ((aDetector = (TObjString*) iter.Next()))
{
fCurrentDetector = aDetector->String();
if (ContinueProcessing() == kFALSE) continue;
+
+ if (first)
+ {
+ // only read QueryCDB when needed and only once
+ AliCDBStorage *mainCDBSto = AliCDBManager::Instance()->GetStorage(fgkMainCDB);
+ if(mainCDBSto) mainCDBSto->QueryCDB(GetCurrentRun());
+ AliCDBStorage *mainRefSto = AliCDBManager::Instance()->GetStorage(fgkMainRefStorage);
+ if(mainRefSto) mainRefSto->QueryCDB(GetCurrentRun());
+ first = kFALSE;
+ }
Log("SHUTTLE", Form("\t\t\t****** run %d - %s: START ******",
GetCurrentRun(), aDetector->GetName()));
}
if (expiredTime % 60 == 0)
+ {
Log("SHUTTLE", Form("Process - %s: Checking process. "
"Run time: %d seconds - Memory consumption: %d KB",
fCurrentDetector.Data(), expiredTime, mem));
+ SendAlive();
+ }
if (mem > fConfig->GetPPMaxMem())
{
if (success) // Preprocessor finished successfully!
{
// remove temporary folder
- gSystem->Exec(Form("rm -rf %s",tmpDir.Data()));
+ // temporary commented (JF)
+ //gSystem->Exec(Form("rm -rf %s",tmpDir.Data()));
// Update time_processed field in FXS DB
if (UpdateTable() == kFALSE)
TObjArray checkEntryArray;
checkEntryArray.SetOwner(1);
TString whereClause = Form("where run=%d", GetCurrentRun());
- if (!QueryShuttleLogbook(whereClause.Data(), checkEntryArray) || checkEntryArray.GetEntries() == 0) {
+ if (!QueryShuttleLogbook(whereClause.Data(), checkEntryArray) ||
+ checkEntryArray.GetEntries() == 0) {
Log("SHUTTLE", Form("Process - Warning: Cannot check status of run %d on Shuttle logbook!",
GetCurrentRun()));
return hasError == kFALSE;
}
}
- // remove ML instance
- delete fMonaLisa;
- fMonaLisa = 0;
-
fLogbookEntry = 0;
return hasError == kFALSE;
" Sending mail to DCS experts!", host.Data()));
UpdateShuttleStatus(AliShuttleStatus::kDCSError);
- if (!SendMailToDCS())
- Log("SHUTTLE", Form("ProcessCurrentDetector - Could not send mail to DCS experts!"));
+ //if (!SendMailToDCS())
+ // Log("SHUTTLE", Form("ProcessCurrentDetector - Could not send mail to DCS experts!"));
delete dcsMap;
return kFALSE;
" Sending mail to DCS experts!", host.Data()));
UpdateShuttleStatus(AliShuttleStatus::kDCSError);
- if (!SendMailToDCS())
- Log("SHUTTLE", Form("ProcessCurrentDetector - Could not send mail to DCS experts!"));
+ //if (!SendMailToDCS())
+ // Log("SHUTTLE", Form("ProcessCurrentDetector - Could not send mail to DCS experts!"));
if (aliasMap) delete aliasMap;
delete dcsMap;
return kTRUE;
}
+//______________________________________________________________________________________________
+void AliShuttle::CountOpenRuns()
+{
+ // Query DAQ's Shuttle logbook and sends the number of open runs to ML
+
+ // check connection, in case connect
+ if (!Connect(3))
+ return;
+
+ TString sqlQuery;
+ sqlQuery = Form("select count(*) from %s where shuttle_done=0", fConfig->GetShuttlelbTable());
+
+ TSQLResult* aResult = fServer[3]->Query(sqlQuery);
+ if (!aResult) {
+ AliError(Form("Can't execute query <%s>!", sqlQuery.Data()));
+ return;
+ }
+
+ AliDebug(2,Form("Query = %s", sqlQuery.Data()));
+
+ if (aResult->GetRowCount() == 0) {
+ AliError(Form("No result for query %s received", sqlQuery.Data()));
+ return;
+ }
+
+ if (aResult->GetFieldCount() != 1) {
+ AliError(Form("Invalid field count for query %s received", sqlQuery.Data()));
+ return;
+ }
+
+ TSQLRow* aRow = aResult->Next();
+ if (!aRow) {
+ AliError(Form("Could not receive result of query %s", sqlQuery.Data()));
+ return;
+ }
+
+ TString result(aRow->GetField(0), aRow->GetFieldLength(0));
+ Int_t count = result.Atoi();
+
+ Log("SHUTTLE", Form("%d unprocessed runs", count));
+
+ delete aRow;
+ delete aResult;
+
+ TMonaLisaValue mlStatus("SHUTTLE_openruns", count);
+
+ TList mlList;
+ mlList.Add(&mlStatus);
+
+ fMonaLisa->SendParameters(&mlList, "__PROCESSINGINFO__");
+}
+
//______________________________________________________________________________________________
Bool_t AliShuttle::QueryShuttleLogbook(const char* whereClause,
TObjArray& entries)
entries.SetOwner(1);
// check connection, in case connect
- if(!Connect(3)) return kFALSE;
+ if (!Connect(3)) return kFALSE;
TString sqlQuery;
sqlQuery = Form("select * from %s %s order by run", fConfig->GetShuttlelbTable(), whereClause);
UInt_t startTime = entry->GetStartTime();
UInt_t endTime = entry->GetEndTime();
- if (!startTime || !endTime || startTime > endTime) {
+// if (!startTime || !endTime || startTime > endTime)
+// {
+// Log("SHUTTLE",
+// Form("QueryRunParameters - Invalid parameters for Run %d: startTime = %d, endTime = %d. Skipping!",
+// run, startTime, endTime));
+//
+// Log("SHUTTLE", Form("Marking SHUTTLE done for run %d", run));
+// fLogbookEntry = entry;
+// if (!UpdateShuttleLogbook("shuttle_done"))
+// {
+// AliError(Form("Could not update logbook for run %d !", run));
+// }
+// fLogbookEntry = 0;
+//
+// delete entry;
+// delete aRow;
+// delete aResult;
+// return 0;
+// }
+
+ if (!startTime)
+ {
+ Log("SHUTTLE",
+ Form("QueryRunParameters - Invalid parameters for Run %d: "
+ "startTime = %d, endTime = %d. Skipping!",
+ run, startTime, endTime));
+
+ Log("SHUTTLE", Form("Marking SHUTTLE done for run %d", run));
+ fLogbookEntry = entry;
+ if (!UpdateShuttleLogbook("shuttle_ignored"))
+ {
+ AliError(Form("Could not update logbook for run %d !", run));
+ }
+ fLogbookEntry = 0;
+
+ delete entry;
+ delete aRow;
+ delete aResult;
+ return 0;
+ }
+
+ if (startTime && !endTime)
+ {
+ // TODO Here we don't mark SHUTTLE done, because this may mean
+ //the run is still ongoing!!
Log("SHUTTLE",
- Form("QueryRunParameters - Invalid parameters for Run %d: startTime = %d, endTime = %d",
- run, startTime, endTime));
+ Form("QueryRunParameters - Invalid parameters for Run %d: "
+ "startTime = %d, endTime = %d. Skipping (Shuttle won't be marked as DONE)!",
+ run, startTime, endTime));
+
+ //Log("SHUTTLE", Form("Marking SHUTTLE done for run %d", run));
+ //fLogbookEntry = entry;
+ //if (!UpdateShuttleLogbook("shuttle_done"))
+ //{
+ // AliError(Form("Could not update logbook for run %d !", run));
+ //}
+ //fLogbookEntry = 0;
+
+ delete entry;
+ delete aRow;
+ delete aResult;
+ return 0;
+ }
+
+ if (startTime && endTime && (startTime > endTime))
+ {
+ Log("SHUTTLE",
+ Form("QueryRunParameters - Invalid parameters for Run %d: "
+ "startTime = %d, endTime = %d. Skipping!",
+ run, startTime, endTime));
+
+ Log("SHUTTLE", Form("Marking SHUTTLE done for run %d", run));
+ fLogbookEntry = entry;
+ if (!UpdateShuttleLogbook("shuttle_ignored"))
+ {
+ AliError(Form("Could not update logbook for run %d !", run));
+ }
+ fLogbookEntry = 0;
+
+ delete entry;
+ delete aRow;
+ delete aResult;
+ return 0;
+ }
+
+ TString totEventsStr = entry->GetRunParameter("totalEvents");
+ Int_t totEvents = totEventsStr.Atoi();
+ if (totEvents < 1)
+ {
+ Log("SHUTTLE",
+ Form("QueryRunParameters - Run %d has 0 events - Skipping!", run));
+
+ Log("SHUTTLE", Form("Marking SHUTTLE done for run %d", run));
+ fLogbookEntry = entry;
+ if (!UpdateShuttleLogbook("shuttle_ignored"))
+ {
+ AliError(Form("Could not update logbook for run %d !", run));
+ }
+ fLogbookEntry = 0;
+
delete entry;
delete aRow;
delete aResult;
TString detName(detector);
TString setClause;
- if(detName == "shuttle_done")
+ if (detName == "shuttle_done" || detName == "shuttle_ignored")
{
setClause = "set shuttle_done=1";
- // Send the information to ML
- TMonaLisaText mlStatus("SHUTTLE_status", "Done");
-
- TList mlList;
- mlList.Add(&mlStatus);
+ if (detName == "shuttle_done")
+ {
+ // Send the information to ML
+ TMonaLisaText mlStatus("SHUTTLE_status", "Done");
- fMonaLisa->SendParameters(&mlList);
+ TList mlList;
+ mlList.Add(&mlStatus);
+
+ TString mlID;
+ mlID.Form("%d", GetCurrentRun());
+ fMonaLisa->SendParameters(&mlList, mlID);
+ }
} else {
TString statusStr(status);
if(statusStr.Contains("done", TString::kIgnoreCase) ||
// Fill log string with a message
//
- void* dir = gSystem->OpenDirectory(GetShuttleLogDir());
+ TString logRunDir = GetShuttleLogDir();
+ if (GetCurrentRun() >=0)
+ logRunDir += Form("/%d", GetCurrentRun());
+
+ void* dir = gSystem->OpenDirectory(logRunDir.Data());
if (dir == NULL) {
- if (gSystem->mkdir(GetShuttleLogDir(), kTRUE)) {
+ if (gSystem->mkdir(logRunDir.Data(), kTRUE)) {
AliError(Form("Can't open directory <%s>", GetShuttleLogDir()));
return;
}
TString fileName;
if (GetCurrentRun() >= 0)
- fileName.Form("%s/%s_%d.log", GetShuttleLogDir(), detector, GetCurrentRun());
- else
+ {
+ fileName.Form("%s/%d/%s_%d.log", GetShuttleLogDir(), GetCurrentRun(),
+ detector, GetCurrentRun());
+ } else {
fileName.Form("%s/%s.log", GetShuttleLogDir(), detector);
+ }
return fileName;
}
+//______________________________________________________________________________________________
+void AliShuttle::SendAlive()
+{
+ // sends alive message to ML
+
+ TMonaLisaText mlStatus("SHUTTLE_status", "Alive");
+
+ TList mlList;
+ mlList.Add(&mlStatus);
+
+ fMonaLisa->SendParameters(&mlList, "__PROCESSINGINFO__");
+}
+
//______________________________________________________________________________________________
Bool_t AliShuttle::Collect(Int_t run)
{
SetLastAction("Starting");
+ // create ML instance
+ if (!fMonaLisa)
+ fMonaLisa = new TMonaLisaWriter(fConfig->GetMonitorHost(), fConfig->GetMonitorTable());
+
+ SendAlive();
+ CountOpenRuns();
+
TString whereClause("where shuttle_done=0");
if (run != -1)
whereClause += Form(" and run=%d", run);
if (fTestMode != kNone)
return kTRUE;
+ TString to="";
+ TIter iterExperts(fConfig->GetResponsibles(fCurrentDetector));
+ TObjString *anExpert=0;
+ while ((anExpert = (TObjString*) iterExperts.Next()))
+ {
+ to += Form("%s,", anExpert->GetName());
+ }
+ if (to.Length() > 0)
+ to.Remove(to.Length()-1);
+ AliDebug(2, Form("to: %s",to.Data()));
+
+ if (to.IsNull()) {
+ Log("SHUTTLE", "List of detector responsibles not yet set!");
+ return kFALSE;
+ }
+
void* dir = gSystem->OpenDirectory(GetShuttleLogDir());
if (dir == NULL)
{
return kFALSE;
}
- TString to="";
- TIter iterExperts(fConfig->GetResponsibles(fCurrentDetector));
- TObjString *anExpert=0;
- while ((anExpert = (TObjString*) iterExperts.Next()))
- {
- to += Form("%s,", anExpert->GetName());
- }
- to.Remove(to.Length()-1);
- AliDebug(2, Form("to: %s",to.Data()));
-
- if (to.IsNull()) {
- Log("SHUTTLE", "List of detector responsibles not yet set!");
- return kFALSE;
- }
-
TString cc="alberto.colla@cern.ch";
- TString subject = Form("%s Shuttle preprocessor FAILED in run %d !",
- fCurrentDetector.Data(), GetCurrentRun());
+ TString subject = Form("%s Shuttle preprocessor FAILED in run %d (run type = %s)!",
+ fCurrentDetector.Data(), GetCurrentRun(), GetRunType());
AliDebug(2, Form("subject: %s", subject.Data()));
TString body = Form("Dear %s expert(s), \n\n", fCurrentDetector.Data());
body += Form("SHUTTLE just detected that your preprocessor "
- "failed processing run %d!!\n\n", GetCurrentRun());
- body += Form("Please check %s status on the SHUTTLE monitoring page: \n\n", fCurrentDetector.Data());
- body += Form("\thttp://pcalimonitor.cern.ch:8889/shuttle.jsp?time=168 \n\n");
+ "failed processing run %d (run type = %s)!!\n\n",
+ GetCurrentRun(), GetRunType());
+ body += Form("Please check %s status on the SHUTTLE monitoring page: \n\n",
+ fCurrentDetector.Data());
+ if (fConfig->GetRunMode() == AliShuttleConfig::kTest)
+ {
+ body += Form("\thttp://pcalimonitor.cern.ch:8889/shuttle.jsp?time=168 \n\n");
+ } else {
+ body += Form("\thttp://pcalimonitor.cern.ch/shuttle.jsp?instance=PROD&time=168 \n\n");
+ }
+
+
+ TString logFolder = "logs";
+ if (fConfig->GetRunMode() == AliShuttleConfig::kProd)
+ logFolder += "_PROD";
+
+
body += Form("Find the %s log for the current run on \n\n"
- "\thttp://pcalishuttle01.cern.ch:8880/logs/%s_%d.log \n\n",
- fCurrentDetector.Data(), fCurrentDetector.Data(), GetCurrentRun());
- body += Form("The last 10 lines of %s log file are following:\n\n");
+ "\thttp://pcalishuttle01.cern.ch:8880/%s/%d/%s_%d.log \n\n",
+ fCurrentDetector.Data(), logFolder.Data(), GetCurrentRun(),
+ fCurrentDetector.Data(), GetCurrentRun());
+ body += Form("The last 10 lines of %s log file are following:\n\n", fCurrentDetector.Data());
AliDebug(2, Form("Body begin: %s", body.Data()));
mailBody.close();
mailBody.open(bodyFileName, ofstream::out | ofstream::app);
- TString logFileName = Form("%s/%s_%d.log", GetShuttleLogDir(), fCurrentDetector.Data(), GetCurrentRun());
+ TString logFileName = Form("%s/%d/%s_%d.log", GetShuttleLogDir(),
+ GetCurrentRun(), fCurrentDetector.Data(), GetCurrentRun());
TString tailCommand = Form("tail -n 10 %s >> %s", logFileName.Data(), bodyFileName.Data());
if (gSystem->Exec(tailCommand.Data()))
{
TString body = Form("Dear DCS experts, \n\n");
body += Form("SHUTTLE couldn\'t retrieve the data points for detector %s "
"in run %d!!\n\n", fCurrentDetector.Data(), GetCurrentRun());
- body += Form("Please check %s status on the SHUTTLE monitoring page: \n\n", fCurrentDetector.Data());
- body += Form("\thttp://pcalimonitor.cern.ch:8889/shuttle.jsp?time=168 \n\n");
+ body += Form("Please check %s status on the SHUTTLE monitoring page: \n\n",
+ fCurrentDetector.Data());
+ if (fConfig->GetRunMode() == AliShuttleConfig::kTest)
+ {
+ body += Form("\thttp://pcalimonitor.cern.ch:8889/shuttle.jsp?time=168 \n\n");
+ } else {
+ body += Form("\thttp://pcalimonitor.cern.ch/shuttle.jsp?instance=PROD?time=168 \n\n");
+ }
+
+ TString logFolder = "logs";
+ if (fConfig->GetRunMode() == AliShuttleConfig::kProd)
+ logFolder += "_PROD";
+
+
body += Form("Find the %s log for the current run on \n\n"
- "\thttp://pcalishuttle01.cern.ch:8880/logs/%s_%d.log \n\n",
- fCurrentDetector.Data(), fCurrentDetector.Data(), GetCurrentRun());
- body += Form("The last 10 lines of %s log file are following:\n\n");
+ "\thttp://pcalishuttle01.cern.ch:8880/%s/%d/%s_%d.log \n\n",
+ fCurrentDetector.Data(), logFolder.Data(), GetCurrentRun(),
+ fCurrentDetector.Data(), GetCurrentRun());
+ body += Form("The last 10 lines of %s log file are following:\n\n", fCurrentDetector.Data());
AliDebug(2, Form("Body begin: %s", body.Data()));
mailBody.close();
mailBody.open(bodyFileName, ofstream::out | ofstream::app);
- TString logFileName = Form("%s/%s_%d.log", GetShuttleLogDir(), fCurrentDetector.Data(), GetCurrentRun());
+ TString logFileName = Form("%s/%d/%s_%d.log", GetShuttleLogDir(), GetCurrentRun(),
+ fCurrentDetector.Data(), GetCurrentRun());
TString tailCommand = Form("tail -n 10 %s >> %s", logFileName.Data(), bodyFileName.Data());
if (gSystem->Exec(tailCommand.Data()))
{