#include <TFile.h>
#include <TGrid.h>
#include <TGridResult.h>
+#include <TMap.h>
#include <TMonaLisaWriter.h>
fLogbookEntry(0),
fCurrentDetector(),
fFirstProcessing(0),
-fFXSError(kFALSE),
+fFXSError(-1),
fStatusEntry(0),
fMonitoringMutex(0),
fLastActionTime(0),
//
// returns 0 if fail, 1 otherwise
+
if (fTestMode & kErrorStorage)
{
Log(fCurrentDetector, "StoreLocally - In TESTMODE - Simulating error while storing locally");
if (!(AliCDBManager::Instance()->GetStorage(localUri))) {
Log("SHUTTLE", Form("StoreLocally - Cannot activate local %s storage", cdbType));
} else {
+ Int_t logLevel = AliLog::GetGlobalLogLevel();
+ AliLog::SetGlobalLogLevel(AliLog::kError);
result = AliCDBManager::Instance()->GetStorage(localUri)
->Put(object, id, metaData);
+ AliLog::SetGlobalLogLevel((AliLog::EType_t)logLevel);
}
if(!result) {
Log(fCurrentDetector, Form("StoreLocally - Can't store object <%s>!", id.ToString().Data()));
}
+
return result;
}
//
UpdateShuttleStatus(AliShuttleStatus::kStoreStarted);
-
+
if (fTestMode & kErrorGrid)
{
Log("SHUTTLE", "StoreOCDB - In TESTMODE - Simulating error while storing in the Grid");
Bool_t resultMetadata = kTRUE;
if(fCurrentDetector == "GRP")
{
- Log("StoreOCDB - SHUTTLE","Storing Run Metadata file ...");
+ Log("SHUTTLE","StoreOCDB - Storing Run Metadata file ...");
resultMetadata = CopyFilesToGrid("metadata");
}
AliCDBId aLocId = aLocEntry->GetId();
aLocEntry->SetVersion(-1);
aLocEntry->SetSubVersion(-1);
+
+ Log(fCurrentDetector.Data(), Form("Attempting to store %s", aLocId.ToString().Data()));
// If local object is valid up to infinity we store it only if it is
// the first unprocessed run!
Bool_t store = kTRUE;
TIter gridIter(gridIds);
AliCDBId* aGridId = 0;
- while((aGridId = dynamic_cast<AliCDBId*> (gridIter.Next()))){
- if(aGridId->GetPath() != aLocId.GetPath()) continue;
+ while ((aGridId = dynamic_cast<AliCDBId*> (gridIter.Next()))) {
+ if (aGridId->GetPath() != aLocId.GetPath())
+ continue;
// skip all objects valid up to infinity
- if(aGridId->GetLastRun() == AliCDBRunRange::Infinity()) continue;
+ if (aGridId->GetLastRun() == AliCDBRunRange::Infinity())
+ continue;
+
// if we get here, it means there's already some more recent object stored on Grid!
+ Log(fCurrentDetector.Data(),
+ Form("StoreOCDB - A more recent object already exists in %s storage: <%s>",
+ type, aGridId->ToString().Data()));
+
store = kFALSE;
break;
}
- // If we get here, the file can be stored!
- Bool_t storeOk = gridSto->Put(aLocEntry);
- if(!store || storeOk){
-
- if (!store)
- {
- Log(fCurrentDetector.Data(),
- Form("StoreOCDB - A more recent object already exists in %s storage: <%s>",
- type, aGridId->ToString().Data()));
- } else {
+ Bool_t storeOk = kFALSE;
+ if (store)
+ {
+ Log(fCurrentDetector.Data(), Form("Prechecks succeeded. Ready to store %s", aLocId.ToString().Data()));
+ storeOk = gridSto->Put(aLocEntry);
+ if (storeOk) {
Log("SHUTTLE",
- Form("StoreOCDB - Object <%s> successfully put into %s storage",
- aLocId.ToString().Data(), type));
+ Form("StoreOCDB - Object <%s> successfully put into %s storage",
+ aLocId.ToString().Data(), type));
Log(fCurrentDetector.Data(),
Form("StoreOCDB - Object <%s> successfully put into %s storage",
- aLocId.ToString().Data(), type));
+ aLocId.ToString().Data(), type));
+ } else {
+ Log("SHUTTLE",
+ Form("StoreOCDB - Grid %s storage of object <%s> failed",
+ type, aLocId.ToString().Data()));
+ Log(fCurrentDetector.Data(),
+ Form("StoreOCDB - Grid %s storage of object <%s> failed",
+ type, aLocId.ToString().Data()));
+ result = kFALSE;
}
-
- // removing local filename...
+ }
+
+ if (!store || storeOk) {
+ // removing local file...
TString filename;
localSto->IdToFilename(aLocId, filename);
Log("SHUTTLE", Form("StoreOCDB - Removing local file %s", filename.Data()));
RemoveFile(filename.Data());
- continue;
- } else {
- Log("SHUTTLE",
- Form("StoreOCDB - Grid %s storage of object <%s> failed",
- type, aLocId.ToString().Data()));
- Log(fCurrentDetector.Data(),
- Form("StoreOCDB - Grid %s storage of object <%s> failed",
- type, aLocId.ToString().Data()));
- result = kFALSE;
}
}
localEntries->Clear();
lhcPeriod.Data()));
}
- TString target = Form("%s/GRP/RunMetadata/alice/data/%d/%s/%09d/raw/%s",
- localBaseFolder.Data(), GetCurrentYear(),
+ TString target = Form("%s/GRP/RunMetadata%s%d/%s/%09d/raw/%s",
+ localBaseFolder.Data(), fConfig->GetAlienPath(), GetCurrentYear(),
lhcPeriod.Data(), GetCurrentRun(), gridFileName);
return CopyFileLocally(localFile, target);
lhcPeriod.Append(Form("_%s", partition.Data()));
}
- dir = Form("%s/GRP/RunMetadata/alice/data/%d/%s/%09d/raw",
- localBaseFolder.Data(), GetCurrentYear(),
+ dir = Form("%s/GRP/RunMetadata%s%d/%s/%09d/raw",
+ localBaseFolder.Data(), fConfig->GetAlienPath(), GetCurrentYear(),
lhcPeriod.Data(), GetCurrentRun());
- alienDir = dir(dir.Index("/alice/data/"), dir.Length());
+ alienDir = dir(dir.Index(fConfig->GetAlienPath()), dir.Length());
begin = "";
}
//
TString offDetStr(GetOfflineDetName(detector));
- TString dir;
+ static TString dir;
if (offDetStr == "ITS" || offDetStr == "MUON" || offDetStr == "PHOS")
{
dir.Form("%s/%s/%s", base, offDetStr.Data(), detector);
}
return dir.Data();
-
-
}
//______________________________________________________________________________________________
fStatusEntry = 0;
}
+ Int_t path1 = GetCurrentRun()/10000;
fStatusEntry = AliCDBManager::Instance()->GetStorage(GetLocalCDB())
- ->Get(Form("/SHUTTLE/STATUS/%s", fCurrentDetector.Data()), GetCurrentRun());
+ ->Get(Form("/SHUTTLE/%s/%d", fCurrentDetector.Data(), path1), GetCurrentRun());
if (!fStatusEntry) return 0;
fStatusEntry->SetOwner(1);
}
Int_t run = GetCurrentRun();
+ Int_t path1 = run/10000;
+ TString path1_string = Form("%d",path1);
- AliCDBId id(AliCDBPath("SHUTTLE", "STATUS", fCurrentDetector), run, run);
+ AliCDBId id(AliCDBPath("SHUTTLE", fCurrentDetector, path1_string), run, run);
fStatusEntry = new AliCDBEntry(status, id, new AliCDBMetaData);
fStatusEntry->SetOwner(1);
+ Int_t logLevel = AliLog::GetGlobalLogLevel();
+ AliLog::SetGlobalLogLevel(AliLog::kError);
+
UInt_t result = AliCDBManager::Instance()->GetStorage(fgkLocalCDB)->Put(fStatusEntry);
- if (!result) {
+ AliLog::SetGlobalLogLevel((AliLog::EType_t)logLevel);
+
+ if (!result) {
Log("SHUTTLE", Form("WriteShuttleStatus - Failed for %s, run %d",
fCurrentDetector.Data(), run));
return kFALSE;
}
- SendMLInfo();
+ SendMLDetInfo();
return kTRUE;
}
status->SetStatus(newStatus);
if (increaseCount) status->IncreaseCount();
+ Int_t logLevel = AliLog::GetGlobalLogLevel();
+ AliLog::SetGlobalLogLevel(AliLog::kError);
+
AliCDBManager::Instance()->GetStorage(fgkLocalCDB)->Put(fStatusEntry);
- SendMLInfo();
+ AliLog::SetGlobalLogLevel((AliLog::EType_t)logLevel);
+
+ SendMLDetInfo();
}
//______________________________________________________________________________________________
-void AliShuttle::SendMLInfo()
+void AliShuttle::SendMLDetInfo()
{
//
// sends ML information about the current status of the current detector being processed
AliShuttleStatus* status = dynamic_cast<AliShuttleStatus*> (fStatusEntry->GetObject());
if (!status){
- Log("SHUTTLE", "SendMLInfo - UNEXPECTED: status could not be read from current CDB entry");
+ Log("SHUTTLE", "SendMLDetInfo - UNEXPECTED: status could not be read from current CDB entry");
return;
}
return WriteShuttleStatus(status);
}
- // The following two cases shouldn't happen if Shuttle Logbook was correctly updated.
+ // The following case shouldn't happen if Shuttle Logbook was correctly updated.
// If it happens it may mean Logbook updating failed... let's do it now!
if (status->GetStatus() == AliShuttleStatus::kDone ||
- status->GetStatus() == AliShuttleStatus::kFailed){
+ status->GetStatus() == AliShuttleStatus::kFailed ||
+ status->GetStatus() == AliShuttleStatus::kSkipped) {
Log("SHUTTLE", Form("ContinueProcessing - %s is already %s. Updating Shuttle Logbook",
fCurrentDetector.Data(),
status->GetStatusName(status->GetStatus())));
- UpdateShuttleLogbook(fCurrentDetector.Data(),
- status->GetStatusName(status->GetStatus()));
+
+ if (status->GetStatus() == AliShuttleStatus::kSkipped)
+ {
+ UpdateShuttleLogbook(fCurrentDetector.Data(), "DONE");
+ }
+ else
+ UpdateShuttleLogbook(fCurrentDetector.Data(), status->GetStatusName(status->GetStatus()));
+
return kFALSE;
}
// Send mail to detector expert!
Log("SHUTTLE", Form("ContinueProcessing - Sending mail to %s expert...",
fCurrentDetector.Data()));
- if (!SendMail())
+ // det experts in to
+ TString to="";
+ TIter *iterExperts = 0;
+ iterExperts = new TIter(fConfig->GetResponsibles(fCurrentDetector));
+ TObjString *anExpert=0;
+ while ((anExpert = (TObjString*) iterExperts->Next()))
+ {
+ to += Form("%s, \n", anExpert->GetName());
+ }
+ delete iterExperts;
+
+ if (to.Length() > 0)
+ to.Remove(to.Length()-3);
+ AliDebug(2, Form("to: %s",to.Data()));
+
+ if (to.IsNull()) {
+ Log("SHUTTLE", Form("List of %s responsibles not set!", fCurrentDetector.Data()));
+ return kFALSE;
+ }
+
+ Log(fCurrentDetector.Data(), Form("ContinueProcessing - Sending mail to %s expert(s):",
+ fCurrentDetector.Data()));
+ Log(fCurrentDetector.Data(), Form("\n%s", to.Data()));
+ if (!SendMail(kPPEMail))
Log("SHUTTLE", Form("ContinueProcessing - Could not send mail to %s expert",
fCurrentDetector.Data()));
return cont;
}
+//______________________________________________________________________________________________
+void AliShuttle::SendMLRunInfo(const char* status)
+{
+ //
+ // Send information about this run to ML
+
+ TMonaLisaText mlStatus("SHUTTLE_status", status);
+ TString runType(fLogbookEntry->GetRunType());
+ if (strlen(fLogbookEntry->GetRunParameter("log")) > 0){
+
+ runType += "(";
+ runType += fLogbookEntry->GetRunParameter("log");
+ runType += ")";
+ }
+ if (fLogbookEntry->GetDATestMode()){
+ runType += " (DATest)";
+ }
+ TMonaLisaText mlRunType("SHUTTLE_runtype", runType);
+
+ TList mlList;
+ mlList.Add(&mlStatus);
+ mlList.Add(&mlRunType);
+
+ TString mlID;
+ mlID.Form("%d", GetCurrentRun());
+ fMonaLisa->SendParameters(&mlList, mlID);
+}
+
+//______________________________________________________________________________________________
+Int_t AliShuttle::GetMem(Int_t pid)
+{
+ // invokes ps to get the memory consumption of the process <pid>
+ // returns -1 in case of error
+
+ TString checkStr;
+ checkStr.Form("ps -o vsize --pid %d | tail -n 1", pid);
+ FILE* pipe = gSystem->OpenPipe(checkStr, "r");
+ if (!pipe)
+ {
+ Log("SHUTTLE", Form("Process - Error: "
+ "Could not open pipe to %s", checkStr.Data()));
+ return -1;
+ }
+
+ char buffer[100];
+ if (!fgets(buffer, 100, pipe))
+ {
+ Log("SHUTTLE", "Process - Error: ps did not return anything");
+ gSystem->ClosePipe(pipe);
+ return -1;
+ }
+ gSystem->ClosePipe(pipe);
+
+ //Log("SHUTTLE", Form("ps returned %s", buffer));
+
+ Int_t mem = 0;
+ if ((sscanf(buffer, "%d\n", &mem) != 1) || !mem)
+ {
+ Log("SHUTTLE", "Process - Error: Could not parse output of ps");
+ return -1;
+ }
+
+ return mem;
+}
+
//______________________________________________________________________________________________
Bool_t AliShuttle::Process(AliShuttleLogbookEntry* entry)
{
Log("SHUTTLE", Form("\t\t\t^*^*^*^*^*^*^*^*^*^*^*^* run %d: START ^*^*^*^*^*^*^*^*^*^*^*^*",
GetCurrentRun()));
+ CountOpenRuns();
+
// Send the information to ML
- TMonaLisaText mlStatus("SHUTTLE_status", "Processing");
- TMonaLisaText mlRunType("SHUTTLE_runtype", Form("%s (%s)", entry->GetRunType(), entry->GetRunParameter("log")));
-
- TList mlList;
- mlList.Add(&mlStatus);
- mlList.Add(&mlRunType);
-
- TString mlID;
- mlID.Form("%d", GetCurrentRun());
- fMonaLisa->SendParameters(&mlList, mlID);
+ SendMLRunInfo("Processing");
if (fLogbookEntry->IsDone())
{
// Initialization
Bool_t hasError = kFALSE;
- // Set the CDB and Reference folders according to the year and LHC period
- TString lhcPeriod(GetLHCPeriod());
- if (lhcPeriod.Length() == 0)
- {
- Log("SHUTTLE","Process - LHCPeriod not found in logbook!");
- return 0;
- }
-
- if (fgkMainCDB.Length() == 0)
- fgkMainCDB = Form("alien://folder=/alice/data/%d/%s/OCDB?user=alidaq?cacheFold=/tmp/OCDBCache",
- GetCurrentYear(), lhcPeriod.Data());
-
- if (fgkMainRefStorage.Length() == 0)
- fgkMainRefStorage = Form("alien://folder=/alice/data/%d/%s/Reference?user=alidaq?cacheFold=/tmp/OCDBCache",
- GetCurrentYear(), lhcPeriod.Data());
-
+ // Set the CDB and Reference folders according to the year
+
+ // build cdb paths (repeat each time, run might be a DATest run)
+ if (!fLogbookEntry->GetDATestMode()){
+ fgkMainCDB.Form("alien://folder=%s%d/OCDB?user=alidaq?cacheFold=/tmp/OCDBCache",
+ fConfig->GetAlienPath(), GetCurrentYear());
+
+ fgkMainRefStorage.Form("alien://folder=%s%d/Reference?user=alidaq?cacheFold=/tmp/OCDBCache",
+ fConfig->GetAlienPath(), GetCurrentYear());
+ }
+ else {
+ fgkMainCDB.Form("alien://folder=%s%d/DATest/OCDB?user=alidaq?cacheFold=/tmp/OCDBCache",
+ fConfig->GetAlienPath(), GetCurrentYear());
+
+ fgkMainRefStorage.Form("alien://folder=%s%d/DATest/Reference?user=alidaq?cacheFold=/tmp/OCDBCache",
+ fConfig->GetAlienPath(), GetCurrentYear());
+ }
+
+ AliDebug(2,Form("Main CDB storage = %s",fgkMainCDB.Data()));
+ AliDebug(2,Form("Main Reference storage = %s",fgkMainRefStorage.Data()));
+
// Loop on detectors in the configuration
TIter iter(fConfig->GetDetectors());
TObjString* aDetector = 0;
GetCurrentRun(), aDetector->GetName()));
for(Int_t iSys=0;iSys<3;iSys++) fFXSCalled[iSys]=kFALSE;
+
+ Int_t initialMem = GetMem(getpid());
+ Log("SHUTTLE", Form("Memory consumption before forking is %d", initialMem));
Log(fCurrentDetector.Data(), "Process - Starting processing");
if (expiredTime > fConfig->GetPPTimeOut())
{
- TString tmp;
- tmp.Form("Process - Process of %s time out. "
- "Run time: %d seconds. Killing...",
- fCurrentDetector.Data(), expiredTime);
- Log("SHUTTLE", tmp);
- Log(fCurrentDetector, tmp);
+ TString logMsg;
+ AliShuttleStatus *currentStatus = ReadShuttleStatus();
+ AliShuttleStatus::Status newStatus = AliShuttleStatus::kInvalid;
+
+ if (currentStatus->GetStatus() <= AliShuttleStatus::kPPDone)
+ {
+ // in case pp not yet done set status to kPPTimeOut
+
+ logMsg.Form("Process - Process of %s timed out. Run time: %d seconds. Killing...",
+ fCurrentDetector.Data(), expiredTime);
+ newStatus = AliShuttleStatus::kPPTimeOut;
+ }
+ else if (currentStatus->GetStatus() == AliShuttleStatus::kStoreStarted)
+ {
+ // in case the pp goes in TimeOut while storing the objects in the OCDB
+ // set status to kStoreError
+
+ logMsg.Form("Process - Process of %s timed out while storing the OCDB object. Run time: %d seconds. Killing... and setting status to StoreError.",
+ fCurrentDetector.Data(), expiredTime);
+ newStatus = AliShuttleStatus::kStoreError;
+ }
+ else
+ {
+ // in other cases don't change the status
+
+ logMsg.Form("Process - Process of %s timed out in status = %s. Run time: %d seconds. Killing... without changing the status",
+ fCurrentDetector.Data(), currentStatus->GetStatusName(), expiredTime);
+ }
+
+ Log("SHUTTLE", logMsg);
+ Log(fCurrentDetector, logMsg);
kill(pid, 9);
- UpdateShuttleStatus(AliShuttleStatus::kPPTimeOut);
+ if (newStatus != AliShuttleStatus::kInvalid)
+ UpdateShuttleStatus(newStatus);
hasError = kTRUE;
gSystem->Sleep(1000);
{
gSystem->Sleep(1000);
- TString checkStr;
- checkStr.Form("ps -o vsize --pid %d | tail -n 1", pid);
- FILE* pipe = gSystem->OpenPipe(checkStr, "r");
- if (!pipe)
- {
- Log("SHUTTLE", Form("Process - Error: "
- "Could not open pipe to %s", checkStr.Data()));
+ Int_t mem = GetMem(pid);
+
+ if (mem < 0)
continue;
- }
- char buffer[100];
- if (!fgets(buffer, 100, pipe))
- {
- Log("SHUTTLE", "Process - Error: ps did not return anything");
- gSystem->ClosePipe(pipe);
- continue;
- }
- gSystem->ClosePipe(pipe);
-
- //Log("SHUTTLE", Form("ps returned %s", buffer));
-
- Int_t mem = 0;
- if ((sscanf(buffer, "%d\n", &mem) != 1) || !mem)
- {
- Log("SHUTTLE", "Process - Error: Could not parse output of ps");
- continue;
- }
+ mem -= initialMem;
+ if (mem < 0)
+ mem = 0;
if (expiredTime % 60 == 0)
{
}
else if (pid == 0)
{
- // client
- Log("SHUTTLE", Form("Process - In client process of %d - %s", GetCurrentRun(),
+ // child
+ Log("SHUTTLE", Form("Process - In child process of %d - %s", GetCurrentRun(),
aDetector->GetName()));
Log("SHUTTLE", Form("Process - Redirecting output to %s log",fCurrentDetector.Data()));
}
TString wd = gSystem->WorkingDirectory();
- TString tmpDir = Form("%s/%s_%d_process", GetShuttleTempDir(),
- fCurrentDetector.Data(), GetCurrentRun());
+ Int_t dir_lev1 = GetCurrentRun()/10000;
+ TString tmpDir = Form("%s/%d/%d/%s_process", GetShuttleTempDir(),
+ dir_lev1, GetCurrentRun(), fCurrentDetector.Data());
Int_t result = gSystem->GetPathInfo(tmpDir.Data(), 0, (Long64_t*) 0, 0, 0);
if (!result) // temp dir already exists!
gSystem->Exit(1);
}
- Bool_t success = ProcessCurrentDetector();
-
+ Int_t success = ProcessCurrentDetector();
+
gSystem->ChangeDirectory(wd.Data());
- if (success) // Preprocessor finished successfully!
+ if (success == 1) // Preprocessor finished successfully!
{
// remove temporary folder or DCS map
if (!fConfig->KeepTempFolder())
if (StoreOCDB() == kFALSE)
success = kFALSE;
}
- else
+ else if (success == 0)
{
Log("SHUTTLE",
Form("\t\t\t****** run %d - %s: PP ERROR ******",
fFirstUnprocessed[iDet] = kFALSE;
}
}
+ SendMLRunInfo("Pending");
}
}
}
//______________________________________________________________________________________________
-Bool_t AliShuttle::ProcessCurrentDetector()
+Int_t AliShuttle::ProcessCurrentDetector()
{
//
// Makes data retrieval just for a specific detector (fCurrentDetector).
TString wd = gSystem->WorkingDirectory();
if (!CleanReferenceStorage(fCurrentDetector.Data()))
- return kFALSE;
+ return 0;
gSystem->ChangeDirectory(wd.Data());
- TMap* dcsMap = new TMap();
-
// call preprocessor
AliPreprocessor* aPreprocessor =
dynamic_cast<AliPreprocessor*> (fPreprocessorMap.GetValue(fCurrentDetector));
+ // check if the preprocessor wants to process this run type
+ if (aPreprocessor->ProcessRunType() == kFALSE)
+ {
+ UpdateShuttleStatus(AliShuttleStatus::kSkipped);
+ UpdateShuttleLogbook(fCurrentDetector, "DONE");
+ if (!UpdateTableSkippedCase(fCurrentDetector.Data()))
+ {
+ AliError(Form("Could not update FXS tables for run %d !", GetCurrentRun()));
+ }
+ Log(fCurrentDetector, Form("ProcessCurrentDetector - %s preprocessor is not interested in this run type", fCurrentDetector.Data()));
+
+ return 2;
+ }
+
+ TMap* dcsMap = new TMap();
+
aPreprocessor->Initialize(GetCurrentRun(), GetCurrentStartTime(), GetCurrentEndTime());
Bool_t processDCS = aPreprocessor->ProcessDCS();
UpdateShuttleStatus(AliShuttleStatus::kDCSStarted);
UpdateShuttleStatus(AliShuttleStatus::kDCSError);
delete dcsMap;
- return kFALSE;
+ return 0;
} else {
UpdateShuttleStatus(AliShuttleStatus::kDCSStarted);
TMap* aliasMap = 0;
TMap* dpMap = 0;
-
+
if (fConfig->GetDCSAliases(fCurrentDetector, iServ)->GetEntries() > 0)
{
+ Log(fCurrentDetector, Form("Querying %d DCS aliases", fConfig->GetDCSAliases(fCurrentDetector, iServ)->GetEntries()));
aliasMap = GetValueSet(host, port,
fConfig->GetDCSAliases(fCurrentDetector, iServ),
kAlias, multiSplit);
" Sending mail to DCS experts!", host.Data()));
UpdateShuttleStatus(AliShuttleStatus::kDCSError);
- if (!SendMailToDCS())
+ if (!SendMail(kDCSEMail))
Log("SHUTTLE", Form("ProcessCurrentDetector - "
"Could not send mail to DCS experts!"));
delete dcsMap;
- return kFALSE;
+ return 0;
}
}
if (fConfig->GetDCSDataPoints(fCurrentDetector, iServ)->GetEntries() > 0)
{
+ Log(fCurrentDetector, Form("Querying %d DCS data points", fConfig->GetDCSDataPoints(fCurrentDetector, iServ)->GetEntries()));
dpMap = GetValueSet(host, port,
fConfig->GetDCSDataPoints(fCurrentDetector, iServ),
kDP, multiSplit);
" Sending mail to DCS experts!", host.Data()));
UpdateShuttleStatus(AliShuttleStatus::kDCSError);
- if (!SendMailToDCS())
+ if (!SendMail(kDCSEMail))
Log("SHUTTLE", Form("ProcessCurrentDetector - "
"Could not send mail to DCS experts!"));
if (aliasMap) delete aliasMap;
delete dcsMap;
- return kFALSE;
+ return 0;
}
}
// DCS Archive DB processing successful. Call Preprocessor!
UpdateShuttleStatus(AliShuttleStatus::kPPStarted);
- fFXSError = kFALSE; // this variable is kTRUE after ::Process if an FXS error occured
+ fFXSError = -1; // this variable is kTRUE after ::Process if an FXS error occured
UInt_t returnValue = aPreprocessor->Process(dcsMap);
- if (fFXSError) {
+ if (fFXSError!=-1) {
UpdateShuttleStatus(AliShuttleStatus::kFXSError);
+ SendMail(kFXSEMail, fFXSError);
dcsMap->DeleteAll();
delete dcsMap;
- return kFALSE;
+ return 0;
}
if (returnValue > 0) // Preprocessor error!
UpdateShuttleStatus(AliShuttleStatus::kPPError);
dcsMap->DeleteAll();
delete dcsMap;
- return kFALSE;
+ return 0;
}
// preprocessor ok!
dcsMap->DeleteAll();
delete dcsMap;
- return kTRUE;
+ return 1;
}
//______________________________________________________________________________________________
{
// Query DAQ's Shuttle logbook and sends the number of open runs to ML
+ SendAlive();
+
// check connection, in case connect
if (!Connect(3))
return;
}
// TODO Check field count!
- const UInt_t nCols = 23;
+ const UInt_t nCols = 26;
if (aResult->GetFieldCount() != (Int_t) nCols) {
Log("SHUTTLE", "Invalid SQL result field number!");
delete aResult;
if (!entry)
continue;
+ // DA test mode flag
+ TString daTestModeString(aRow->GetField(2), aRow->GetFieldLength(2)); // field 2 = DA test mode flag
+ Bool_t daTestMode = (Bool_t)daTestModeString.Atoi();
+ entry->SetDATestMode(daTestMode);
+
// loop on detectors
for(UInt_t ii = 0; ii < nCols; ii++)
entry->SetDetectorStatus(aResult->GetFieldName(ii), aRow->GetField(ii));
for (Int_t ii = 0; ii < aResult->GetFieldCount(); ii++)
entry->SetRunParameter(aResult->GetFieldName(ii), aRow->GetField(ii));
+ delete aRow;
+ delete aResult;
+
UInt_t startTime = entry->GetStartTime();
UInt_t endTime = entry->GetEndTime();
-
-// if (!startTime || !endTime || startTime > endTime)
-// {
-// Log("SHUTTLE",
-// Form("QueryRunParameters - Invalid parameters for Run %d: startTime = %d, endTime = %d. Skipping!",
-// run, startTime, endTime));
-//
-// Log("SHUTTLE", Form("Marking SHUTTLE done for run %d", run));
-// fLogbookEntry = entry;
-// if (!UpdateShuttleLogbook("shuttle_done"))
-// {
-// AliError(Form("Could not update logbook for run %d !", run));
-// }
-// fLogbookEntry = 0;
-//
-// delete entry;
-// delete aRow;
-// delete aResult;
-// return 0;
-// }
-
- if (!startTime)
+ Bool_t ecsSuccess = entry->GetECSSuccess();
+
+ TString totEventsStr = entry->GetRunParameter("totalEvents");
+ Int_t totEvents = totEventsStr.Atoi();
+
+ UInt_t now = time(0);
+ // TODO make this a configuration parameter
+ Int_t dcsDelay = fConfig->GetDCSDelay()+fConfig->GetDCSQueryOffset();
+
+ // runs are accepted if they have ecsSuccess set or more than 1 event
+ if (startTime != 0 && endTime != 0 && endTime > startTime && (totEvents > 1 || ecsSuccess) && (endTime < now - dcsDelay))
{
- Log("SHUTTLE",
- Form("QueryRunParameters - Invalid parameters for Run %d: "
- "startTime = %d, endTime = %d. Skipping!",
- run, startTime, endTime));
-
- Log("SHUTTLE", Form("Marking SHUTTLE done for run %d", run));
- fLogbookEntry = entry;
- if (!UpdateShuttleLogbook("shuttle_ignored"))
- {
- AliError(Form("Could not update logbook for run %d !", run));
- }
- fLogbookEntry = 0;
+ if (ecsSuccess == kFALSE)
+ Log("SHUTTLE", Form("Processing run %d although in status ECS failure, Reason: %s", run, entry->GetRunParameter("eor_reason")));
+ return entry;
+ }
+
+ Bool_t skip = kFALSE;
- delete entry;
- delete aRow;
- delete aResult;
- return 0;
+ if (endTime != 0 && endTime >= now - dcsDelay)
+ {
+ Log("SHUTTLE", Form("Skipping run %d for now, because DCS buffer time is not yet expired", run));
}
-
- if (startTime && !endTime)
+ else if (totEvents <= 1)
{
- // TODO Here we don't mark SHUTTLE done, because this may mean
- //the run is still ongoing!!
- Log("SHUTTLE",
- Form("QueryRunParameters - Invalid parameters for Run %d: "
- "startTime = %d, endTime = %d. Skipping (Shuttle won't be marked as DONE)!",
- run, startTime, endTime));
-
- //Log("SHUTTLE", Form("Marking SHUTTLE done for run %d", run));
- //fLogbookEntry = entry;
- //if (!UpdateShuttleLogbook("shuttle_done"))
- //{
- // AliError(Form("Could not update logbook for run %d !", run));
- //}
- //fLogbookEntry = 0;
-
- delete entry;
- delete aRow;
- delete aResult;
- return 0;
+ Log("SHUTTLE", Form("QueryRunParameters - Run %d has 1 event or less - Skipping!", run));
+ skip = kTRUE;
}
-
- if (startTime && endTime && (startTime > endTime))
+ else
{
- Log("SHUTTLE",
- Form("QueryRunParameters - Invalid parameters for Run %d: "
- "startTime = %d, endTime = %d. Skipping!",
- run, startTime, endTime));
-
- Log("SHUTTLE", Form("Marking SHUTTLE done for run %d", run));
- fLogbookEntry = entry;
- if (!UpdateShuttleLogbook("shuttle_ignored"))
- {
- AliError(Form("Could not update logbook for run %d !", run));
- }
- fLogbookEntry = 0;
-
- delete entry;
- delete aRow;
- delete aResult;
- return 0;
+ Log("SHUTTLE", Form("QueryRunParameters - Invalid parameters for Run %d: "
+ "startTime = %d, endTime = %d. Skipping (Shuttle won't be marked as DONE)!",
+ run, startTime, endTime));
}
-
- TString totEventsStr = entry->GetRunParameter("totalEvents");
- Int_t totEvents = totEventsStr.Atoi();
- if (totEvents < 1)
+
+ if (skip)
{
- Log("SHUTTLE",
- Form("QueryRunParameters - Run %d has 0 events - Skipping!", run));
-
- Log("SHUTTLE", Form("Marking SHUTTLE done for run %d", run));
- fLogbookEntry = entry;
- if (!UpdateShuttleLogbook("shuttle_ignored"))
+ Log("SHUTTLE", Form("Marking SHUTTLE skipped for run %d", run));
+ fLogbookEntry = entry;
+ if (!UpdateShuttleLogbook("shuttle_skipped"))
{
AliError(Form("Could not update logbook for run %d !", run));
}
+ if (!UpdateTableSkippedCase("ALL"))
+ {
+ AliError(Form("Could not update FXS tables for run %d !", run));
+ }
fLogbookEntry = 0;
-
- delete entry;
- delete aRow;
- delete aResult;
- return 0;
}
-
- delete aRow;
- delete aResult;
-
- return entry;
+
+ delete entry;
+ return 0;
}
//______________________________________________________________________________________________
TMap* result = 0;
if (type == kAlias)
{
- result = client.GetAliasValues(entries, GetCurrentStartTime(),
- GetCurrentEndTime());
+ //result = client.GetAliasValues(entries, GetCurrentStartTime()-offset,
+ // GetCurrentEndTime()+offset);
+ result = client.GetAliasValues(entries, GetStartTimeDCSQuery(),
+ GetEndTimeDCSQuery());
}
else if (type == kDP)
{
- result = client.GetDPValues(entries, GetCurrentStartTime(),
- GetCurrentEndTime());
+ //result = client.GetDPValues(entries, GetCurrentStartTime()-offset,
+ // GetCurrentEndTime()+offset);
+ result = client.GetDPValues(entries, GetStartTimeDCSQuery(),
+ GetEndTimeDCSQuery());
}
if (result == 0)
if (!Connect(system))
{
Log(detector, Form("GetFile - Couldn't connect to %s FXS database", GetSystemName(system)));
- fFXSError = kTRUE;
+ fFXSError = system;
return 0;
}
if (!aResult) {
Log(detector, Form("GetFile - Can't execute SQL query to %s database for: id = %s, source = %s",
GetSystemName(system), id, sourceName.Data()));
- fFXSError = kTRUE;
+ fFXSError = system;
return 0;
}
Log(detector,
Form("GetFile - More than one entry in %s FXS db for: id = %s, source = %s",
GetSystemName(system), id, sourceName.Data()));
- fFXSError = kTRUE;
+ fFXSError = system;
delete aResult;
return 0;
}
Log(detector,
Form("GetFileName - Wrong field count in %s FXS db for: id = %s, source = %s",
GetSystemName(system), id, sourceName.Data()));
- fFXSError = kTRUE;
+ fFXSError = system;
delete aResult;
return 0;
}
if (!aRow){
Log(detector, Form("GetFile - Empty set result in %s FXS db from query: id = %s, source = %s",
GetSystemName(system), id, sourceName.Data()));
- fFXSError = kTRUE;
+ fFXSError = system;
delete aResult;
return 0;
}
filePath.Data(), fileSize.Data(), fileChecksum.Data()));
// retrieved file is renamed to make it unique
- TString localFileName = Form("%s/%s_%d_process/%s_%s_%d_%s_%s.shuttle",
- GetShuttleTempDir(), detector, GetCurrentRun(),
+ Int_t dir_lev1 = GetCurrentRun()/10000;
+ TString localFileName = Form("%s/%d/%d/%s_process/%s_%s_%d_%s_%s.shuttle",
+ GetShuttleTempDir(), dir_lev1, GetCurrentRun(), detector,
GetSystemName(system), detector, GetCurrentRun(),
id, sourceName.Data());
+ Log("SHUTTLE",Form("file from FXS = %s",localFileName.Data()));
// file retrieval from FXS
{
// compare md5sum of local file with the one stored in the FXS DB
if(fileChecksum.Contains(' ')) fileChecksum.Resize(fileChecksum.First(' '));
- Int_t md5Comp = gSystem->Exec(Form("md5sum %s |grep %s 2>&1 > /dev/null",
+ Int_t md5Comp = gSystem->Exec(Form("md5sum %s |grep %s > /dev/null 2> /dev/null",
localFileName.Data(), fileChecksum.Data()));
if (md5Comp != 0)
if (!result)
{
- fFXSError = kTRUE;
+ fFXSError = system;
return 0;
}
}
}
- TString baseFXSFolder;
- if (system == kDAQ)
- {
- baseFXSFolder = "FES/";
- }
- else if (system == kDCS)
- {
- baseFXSFolder = "";
- }
- else if (system == kHLT)
- {
- baseFXSFolder = "/opt/FXS/";
- }
-
-
- TString command = Form("scp -oPort=%d -2 %s@%s:%s%s %s",
+ TString command = Form("scp -oPort=%d -2 %s@%s:%s/%s %s",
fConfig->GetFXSPort(system),
fConfig->GetFXSUser(system),
fConfig->GetFXSHost(system),
- baseFXSFolder.Data(),
+ fConfig->GetFXSBaseFolder(system),
fxsFileName,
localFileName);
if (!Connect(system))
{
Log(detector, Form("GetFileSources - Couldn't connect to %s FXS database", GetSystemName(system)));
- fFXSError = kTRUE;
+ fFXSError = system;
return NULL;
}
- TString sourceName = 0;
+ TString sourceName = "";
if (system == kDAQ)
{
sourceName = "DAQsource";
if (!aResult) {
Log(detector, Form("GetFileSources - Can't execute SQL query to %s database for id: %s",
GetSystemName(system), id));
- fFXSError = kTRUE;
+ fFXSError = system;
return 0;
}
return NULL;
}
- TString sourceName = 0;
+ TString sourceName = "";
if (system == kDAQ)
{
sourceName = "DAQsource";
//
// check connection: if already connected return
- if(fServer[system] && fServer[system]->IsConnected()) return kTRUE;
+
+ if(fServer[system] && fServer[system]->IsConnected()) {
+ // ping the server
+ if (fServer[system]->PingVerify()==kTRUE){ // connection is still alive
+ return kTRUE;
+ }
+ else{
+ AliWarning(Form("Connection got lost to FXS database for %s. Closing and reconnecting.",
+ AliShuttleInterface::GetSystemName(system)));
+ fServer[system]->Close();
+ delete fServer[system];
+ fServer[system] = 0x0;
+ }
+ }
TString dbHost, dbUser, dbPass, dbName;
return result;
}
-//______________________________________________________________________________________________
-Bool_t AliShuttle::UpdateTableFailCase()
+//_______________________________________________________________________________
+Bool_t AliShuttle::UpdateTableSkippedCase(const char* detector)
{
+ //
// Update FXS table filling time_processed field in all rows corresponding to current run and detector
- // this is called in case the preprocessor is declared failed for the current run, because
- // the fields are updated only in case of success
+ // if detector = "ALL" update all detectors
+ //
Bool_t result = kTRUE;
+ TString detName(detector);
+
for (UInt_t system=0; system<3; system++)
{
+
// check connection, in case connect
if (!Connect(system))
{
- Log(fCurrentDetector, Form("UpdateTableFailCase - Couldn't connect to %s FXS database",
- GetSystemName(system)));
+ Log(fCurrentDetector, Form("UpdateTableSkippedCase - Couldn't connect to %s FXS database", GetSystemName(system)));
result = kFALSE;
continue;
}
TTimeStamp now; // now
// Loop on FXS list entries
+ TIter iter(&fFXSlist[system]);
+
+ TString whereClause;
+ if (detName == "ALL") whereClause = Form("where run=%d and time_processed IS NULL;",GetCurrentRun());
+ else whereClause = Form("where run=%d and detector=\"%s\" and time_processed IS NULL;",GetCurrentRun(), detector);
- TString whereClause = Form("where run=%d and detector=\"%s\";",
- GetCurrentRun(), fCurrentDetector.Data());
-
+ //Log("SHUTTLE",Form(" whereClause = %s ",whereClause.Data()));
TString sqlQuery = Form("update %s set time_processed=%d %s", fConfig->GetFXSdbTable(system),
- now.GetSec(), whereClause.Data());
+ now.GetSec(), whereClause.Data());
AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
aResult = dynamic_cast<TSQLResult*> (fServer[system]->Query(sqlQuery));
if (!aResult)
{
- Log(fCurrentDetector, Form("UpdateTableFailCase - %s db: can't execute SQL query <%s>",
+ Log("SHUTTLE", Form("UpdateTableSkippedCase - %s db: can't execute SQL query <%s>",
GetSystemName(system), sqlQuery.Data()));
result = kFALSE;
continue;
}
delete aResult;
+
}
return result;
}
-
//______________________________________________________________________________________________
-Bool_t AliShuttle::UpdateShuttleLogbook(const char* detector, const char* status)
+Bool_t AliShuttle::UpdateTableFailCase()
{
- //
- // Update Shuttle logbook filling detector or shuttle_done column
- // ex. of usage: UpdateShuttleLogbook("PHOS", "DONE") or UpdateShuttleLogbook("shuttle_done")
- //
+ // Update FXS table filling time_processed field in all rows corresponding to current run and detector
+ // this is called in case the preprocessor is declared failed for the current run, because
+ // the fields are updated only in case of success
+
+ Bool_t result = kTRUE;
+
+ for (UInt_t system=0; system<3; system++)
+ {
+ // check connection, in case connect
+ if (!Connect(system))
+ {
+ Log(fCurrentDetector, Form("UpdateTableFailCase - Couldn't connect to %s FXS database",
+ GetSystemName(system)));
+ result = kFALSE;
+ continue;
+ }
+
+ TTimeStamp now; // now
+
+ // Loop on FXS list entries
+
+ TString whereClause = Form("where run=%d and detector=\"%s\";",
+ GetCurrentRun(), fCurrentDetector.Data());
+
+
+ TString sqlQuery = Form("update %s set time_processed=%d %s", fConfig->GetFXSdbTable(system),
+ now.GetSec(), whereClause.Data());
+
+ AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
+
+ // Query execution
+ TSQLResult* aResult;
+ aResult = dynamic_cast<TSQLResult*> (fServer[system]->Query(sqlQuery));
+ if (!aResult)
+ {
+ Log(fCurrentDetector, Form("UpdateTableFailCase - %s db: can't execute SQL query <%s>",
+ GetSystemName(system), sqlQuery.Data()));
+ result = kFALSE;
+ continue;
+ }
+ delete aResult;
+ }
+
+ return result;
+}
+
+//______________________________________________________________________________________________
+Bool_t AliShuttle::UpdateShuttleLogbook(const char* detector, const char* status)
+{
+ //
+ // Update Shuttle logbook filling detector or shuttle_done column
+ // ex. of usage: UpdateShuttleLogbook("PHOS", "DONE") or UpdateShuttleLogbook("shuttle_done")
+ //
// check connection, in case connect
if(!Connect(3)){
TString detName(detector);
TString setClause;
- if (detName == "shuttle_done" || detName == "shuttle_ignored")
+ if (detName == "shuttle_done" || detName == "shuttle_skipped")
{
setClause = "set shuttle_done=1";
-
+
if (detName == "shuttle_done")
{
- // Send the information to ML
- TMonaLisaText mlStatus("SHUTTLE_status", "Done");
-
- TList mlList;
- mlList.Add(&mlStatus);
-
- TString mlID;
- mlID.Form("%d", GetCurrentRun());
- fMonaLisa->SendParameters(&mlList, mlID);
+ if (TouchFile() != kTRUE)
+ {
+ SendMLRunInfo("Pending");
+ return kFALSE;
+ }
+
+ SendMLRunInfo("Done");
}
- } else {
+ else
+ SendMLRunInfo("Skipped");
+ }
+ else {
TString statusStr(status);
if(statusStr.Contains("done", TString::kIgnoreCase) ||
statusStr.Contains("failed", TString::kIgnoreCase)){
return fLogbookEntry ? fLogbookEntry->GetEndTime() : 0;
}
-
//______________________________________________________________________________________________
UInt_t AliShuttle::GetCurrentYear() const
{
//
// Fill log string with a message
//
-
- TString logRunDir = GetShuttleLogDir();
- if (GetCurrentRun() >=0)
- logRunDir += Form("/%d", GetCurrentRun());
+ TString logRunDir = GetShuttleLogDir();
+ if (GetCurrentRun() >=0) {
+ Int_t logDir_lev1 = GetCurrentRun()/10000;
+ logRunDir += Form("/%d/%d", logDir_lev1, GetCurrentRun());
+ }
void* dir = gSystem->OpenDirectory(logRunDir.Data());
if (dir == NULL) {
if (gSystem->mkdir(logRunDir.Data(), kTRUE)) {
gSystem->FreeDirectory(dir);
}
- TString toLog = Form("%s (%d): %s - ", TTimeStamp(time(0)).AsString("s"), getpid(), detector);
+ TString toLog = Form("%s UTC (%d): %s - ", TTimeStamp(time(0)).AsString("s"), getpid(), detector);
if (GetCurrentRun() >= 0)
toLog += Form("run %d - ", GetCurrentRun());
toLog += Form("%s", message);
if (GetCurrentRun() >= 0)
{
- fileName.Form("%s/%d/%s_%d.log", GetShuttleLogDir(), GetCurrentRun(),
- detector, GetCurrentRun());
+ Int_t logDir_lev1 = GetCurrentRun()/10000;
+ fileName.Form("%s/%d/%d/%s.log", GetShuttleLogDir(), logDir_lev1, GetCurrentRun(),
+ detector);
} else {
fileName.Form("%s/%s.log", GetShuttleLogDir(), detector);
}
if (!fMonaLisa)
fMonaLisa = new TMonaLisaWriter(fConfig->GetMonitorHost(), fConfig->GetMonitorTable());
- SendAlive();
CountOpenRuns();
TString whereClause("where shuttle_done=0");
{
// query Shuttle logbook for earlier runs, check if some detectors are unprocessed,
// flag them into fFirstUnprocessed array
- TString whereClause(Form("where shuttle_done=0 and run < %d", run));
+ TString whereClauseBis(Form("where shuttle_done=0 and run < %d", run));
TObjArray tmpLogbookEntries;
- if (!QueryShuttleLogbook(whereClause, tmpLogbookEntries))
+ if (!QueryShuttleLogbook(whereClauseBis, tmpLogbookEntries))
{
Log("SHUTTLE", "Collect - Can't retrieve entries from Shuttle logbook");
return kFALSE;
}
//______________________________________________________________________________________________
-Bool_t AliShuttle::SendMail()
+Bool_t AliShuttle::SendMail(EMailTarget target, Int_t system)
{
//
// sends a mail to the subdetector expert in case of preprocessor error
if (!fConfig->SendMail())
return kTRUE;
+ if (target == kDCSEMail || target == kFXSEMail) {
+ if (!fFirstProcessing)
+ return kTRUE;
+ }
+
+ Int_t runMode = (Int_t)fConfig->GetRunMode();
+ TString tmpStr;
+ if (runMode == 0) tmpStr = " Nightly Test:";
+ else tmpStr = " Data Taking:";
+ void* dir = gSystem->OpenDirectory(GetShuttleLogDir());
+ if (dir == NULL)
+ {
+ if (gSystem->mkdir(GetShuttleLogDir(), kTRUE))
+ {
+ Log("SHUTTLE", Form("SendMail - Can't open directory <%s>", GetShuttleLogDir()));
+ return kFALSE;
+ }
+
+ } else {
+ gSystem->FreeDirectory(dir);
+ }
+
+ // det experts in to
TString to="";
- TIter iterExperts(fConfig->GetResponsibles(fCurrentDetector));
+ TIter *iterExperts = 0;
+ if (target == kDCSEMail) {
+ iterExperts = new TIter(fConfig->GetAdmins(AliShuttleConfig::kAmanda));
+ }
+ else if (target == kFXSEMail) {
+ iterExperts = new TIter(fConfig->GetAdmins(system));
+ }
+ if (iterExperts) {
+ TObjString *anExpert=0;
+ while ((anExpert = (TObjString*) iterExperts->Next()))
+ {
+ to += Form("%s,", anExpert->GetName());
+ }
+ delete iterExperts;
+ }
+
+ // add subdetector experts
+ iterExperts = new TIter(fConfig->GetResponsibles(fCurrentDetector));
TObjString *anExpert=0;
- while ((anExpert = (TObjString*) iterExperts.Next()))
+ while ((anExpert = (TObjString*) iterExperts->Next()))
{
to += Form("%s,", anExpert->GetName());
}
+ delete iterExperts;
+
if (to.Length() > 0)
to.Remove(to.Length()-1);
AliDebug(2, Form("to: %s",to.Data()));
if (to.IsNull()) {
- Log("SHUTTLE", "List of detector responsibles not set!");
+ Log("SHUTTLE", Form("List of %d responsibles not set!", (Int_t) target));
return kFALSE;
}
- void* dir = gSystem->OpenDirectory(GetShuttleLogDir());
- if (dir == NULL)
+ // SHUTTLE responsibles in cc
+ TString cc="";
+ TIter iterAdmins(fConfig->GetAdmins(AliShuttleConfig::kGlobal));
+ TObjString *anAdmin=0;
+ while ((anAdmin = (TObjString*) iterAdmins.Next()))
{
- if (gSystem->mkdir(GetShuttleLogDir(), kTRUE))
- {
- Log("SHUTTLE", Form("SendMail - Can't open directory <%s>", GetShuttleLogDir()));
- return kFALSE;
- }
-
- } else {
- gSystem->FreeDirectory(dir);
+ cc += Form("%s,", anAdmin->GetName());
}
+ if (cc.Length() > 0)
+ cc.Remove(cc.Length()-1);
+ AliDebug(2, Form("cc: %s",to.Data()));
+ // mail body
TString bodyFileName;
bodyFileName.Form("%s/mail.body", GetShuttleLogDir());
gSystem->ExpandPathName(bodyFileName);
return kFALSE;
}
- TString cc="";
- TIter iterAdmins(fConfig->GetAdmins(AliShuttleConfig::kGlobal));
- TObjString *anAdmin=0;
- while ((anAdmin = (TObjString*) iterAdmins.Next()))
- {
- cc += Form("%s,", anAdmin->GetName());
- }
- if (cc.Length() > 0)
- cc.Remove(cc.Length()-1);
- AliDebug(2, Form("cc: %s",to.Data()));
- TString subject = Form("%s Shuttle preprocessor FAILED in run %d (run type = %s)!",
- fCurrentDetector.Data(), GetCurrentRun(), GetRunType());
- AliDebug(2, Form("subject: %s", subject.Data()));
+ TString subject;
+ TString body;
+
+ if (target == kDCSEMail){
+ subject = Form("%s CRITICAL Retrieval of data points for %s FAILED in run %d !",
+ tmpStr.Data(), fCurrentDetector.Data(), GetCurrentRun());
+ AliDebug(2, Form("subject: %s", subject.Data()));
+
+ body = Form("Dear DCS experts, \n\n");
+ body += Form("SHUTTLE couldn\'t retrieve the data points for detector %s "
+ "in run %d!!\n\n", fCurrentDetector.Data(), GetCurrentRun());
+ }
+ else if (target == kFXSEMail){
+ subject = Form("%s CRITICAL FXS communication for %s FAILED in run %d !",
+ tmpStr.Data(), fCurrentDetector.Data(), GetCurrentRun());
+ AliDebug(2, Form("subject: %s", subject.Data()));
+ TString sys;
+ if (system == kDAQ) sys="DAQ";
+ else if (system == kDCS) sys="DCS";
+ else if (system == kHLT) sys="HLT";
+ else return kFALSE;
+ body = Form("Dear %s FXS experts, \n\n",sys.Data());
+ body += Form("SHUTTLE couldn\'t retrieve data from the FXS for detector %s "
+ "in run %d!!\n\n", fCurrentDetector.Data(), GetCurrentRun());
+ body += Form("The contacted server was:\nDB: %s\nFXS:%s\n\n", fConfig->GetFXSdbHost(system), fConfig->GetFXSHost(system));
+ }
+ else {
+ subject = Form("%s %s Shuttle preprocessor FAILED in run %d (run type = %s)!",
+ tmpStr.Data(), fCurrentDetector.Data(), GetCurrentRun(), GetRunType());
+ AliDebug(2, Form("subject: %s", subject.Data()));
+
+ body = Form("Dear %s expert(s), \n\n", fCurrentDetector.Data());
+ body += Form("SHUTTLE just detected that your preprocessor "
+ "failed processing run %d (run type = %s)!!\n\n",
+ GetCurrentRun(), GetRunType());
+ }
- TString body = Form("Dear %s expert(s), \n\n", fCurrentDetector.Data());
- body += Form("SHUTTLE just detected that your preprocessor "
- "failed processing run %d (run type = %s)!!\n\n",
- GetCurrentRun(), GetRunType());
body += Form("Please check %s status on the SHUTTLE monitoring page: \n\n",
fCurrentDetector.Data());
if (fConfig->GetRunMode() == AliShuttleConfig::kTest)
{
- body += Form("\thttp://pcalimonitor.cern.ch:8889/shuttle.jsp?time=168 \n\n");
+ body += Form("\thttp://pcalimonitor.cern.ch/shuttle.jsp?time=24 \n\n");
} else {
- body += Form("\thttp://pcalimonitor.cern.ch/shuttle.jsp?instance=PROD&time=168 \n\n");
+ body += Form("\thttp://pcalimonitor.cern.ch/shuttle.jsp?instance=PROD&time=24 \n\n");
}
body += Form("Find the %s log for the current run on \n\n"
- "\thttp://pcalishuttle01.cern.ch:8880/%s/%d/%s_%d.log \n\n",
- fCurrentDetector.Data(), logFolder.Data(), GetCurrentRun(),
- fCurrentDetector.Data(), GetCurrentRun());
- body += Form("The last 10 lines of %s log file are following:\n\n", fCurrentDetector.Data());
+ "\thttp://pcalishuttle02.cern.ch/%s/%d/%d/%s.log \n\n",
+ fCurrentDetector.Data(), logFolder.Data(), GetCurrentRun()/10000,
+ GetCurrentRun(), fCurrentDetector.Data());
+ body += Form("The last 15 lines of %s log file are following:\n\n", fCurrentDetector.Data());
AliDebug(2, Form("Body begin: %s", body.Data()));
mailBody.close();
mailBody.open(bodyFileName, ofstream::out | ofstream::app);
- TString logFileName = Form("%s/%d/%s_%d.log", GetShuttleLogDir(),
- GetCurrentRun(), fCurrentDetector.Data(), GetCurrentRun());
- TString tailCommand = Form("tail -n 10 %s >> %s", logFileName.Data(), bodyFileName.Data());
+ TString logFileName = Form("%s/%d/%d/%s.log", GetShuttleLogDir(),
+ GetCurrentRun()/10000, GetCurrentRun(), fCurrentDetector.Data());
+ TString tailCommand = Form("tail -n 15 %s >> %s", logFileName.Data(), bodyFileName.Data());
if (gSystem->Exec(tailCommand.Data()))
{
mailBody << Form("%s log file not found ...\n\n", fCurrentDetector.Data());
return result == 0;
}
-
//______________________________________________________________________________________________
-Bool_t AliShuttle::SendMailToDCS()
+const char* AliShuttle::GetRunType()
{
//
- // sends a mail to the DCS Amanda experts in case of DCS data point retrieval error
+ // returns run type read from "run type" logbook
//
-
- if (fTestMode != kNone)
- return kTRUE;
- if (!fConfig->SendMail())
- return kTRUE;
-
- if (!fFirstProcessing)
- return kTRUE;
-
- void* dir = gSystem->OpenDirectory(GetShuttleLogDir());
- if (dir == NULL)
- {
- if (gSystem->mkdir(GetShuttleLogDir(), kTRUE))
- {
- Log("SHUTTLE", Form("SendMailToDCS - Can't open directory <%s>", GetShuttleLogDir()));
- return kFALSE;
- }
-
- } else {
- gSystem->FreeDirectory(dir);
+ if(!fLogbookEntry) {
+ AliError("No logbook entry!");
+ return 0;
}
- TString bodyFileName;
- bodyFileName.Form("%s/mail.body", GetShuttleLogDir());
- gSystem->ExpandPathName(bodyFileName);
-
- ofstream mailBody;
- mailBody.open(bodyFileName, ofstream::out);
+ return fLogbookEntry->GetRunType();
+}
- if (!mailBody.is_open())
- {
- Log("SHUTTLE", Form("SendMailToDCS - Could not open mail body file %s", bodyFileName.Data()));
- return kFALSE;
- }
+//______________________________________________________________________________________________
+Bool_t AliShuttle::GetHLTStatus()
+{
+ // Return HLT status (ON=1 OFF=0)
+ // Converts the HLT status from the mode string read in the run logbook (not just a bool)
- TString to="";
- TIter iterExperts(fConfig->GetAdmins(AliShuttleConfig::kAmanda));
- TObjString *anExpert=0;
- while ((anExpert = (TObjString*) iterExperts.Next()))
- {
- to += Form("%s,", anExpert->GetName());
+ if(!fLogbookEntry) {
+ AliError("No logbook entry!");
+ return 0;
}
- if (to.Length() > 0)
- to.Remove(to.Length()-1);
- AliDebug(2, Form("to: %s",to.Data()));
- if (to.IsNull()) {
- Log("SHUTTLE", "List of Amanda server administrators not set!");
+ // TODO implement when HLTMode is inserted in run logbook
+ TString hltMode = fLogbookEntry->GetRunParameter("HLTmode");
+ TSubString firstChar = hltMode(0,1);
+ AliDebug(2,Form("First char = %s ",firstChar.Data()));
+ if (firstChar == "A") {
return kFALSE;
}
-
- TString cc="";
- TIter iterAdmins(fConfig->GetAdmins(AliShuttleConfig::kGlobal));
- TObjString *anAdmin=0;
- while ((anAdmin = (TObjString*) iterAdmins.Next()))
- {
- cc += Form("%s,", anAdmin->GetName());
+ else if ((firstChar == "B") || (firstChar == "C") || (firstChar == "D") || (firstChar == "E")) {
+ return kTRUE;
}
- if (cc.Length() > 0)
- cc.Remove(cc.Length()-1);
- AliDebug(2, Form("cc: %s",to.Data()));
+ else {
+ Log("SHUTTLE","Unexpected HLT mode! Returning 0....");
+ return kFALSE;
+ }
+}
- TString subject = Form("Retrieval of data points for %s FAILED in run %d !",
- fCurrentDetector.Data(), GetCurrentRun());
- AliDebug(2, Form("subject: %s", subject.Data()));
+//______________________________________________________________________________________________
+const char* AliShuttle::GetTriggerConfiguration()
+{
+ // Receives the trigger configuration from the DAQ logbook for the current run
+
+ // check connection, if needed reconnect
+ if (!Connect(3))
+ return 0;
- TString body = Form("Dear DCS experts, \n\n");
- body += Form("SHUTTLE couldn\'t retrieve the data points for detector %s "
- "in run %d!!\n\n", fCurrentDetector.Data(), GetCurrentRun());
- body += Form("Please check %s status on the SHUTTLE monitoring page: \n\n",
- fCurrentDetector.Data());
- if (fConfig->GetRunMode() == AliShuttleConfig::kTest)
+ TString sqlQuery;
+ sqlQuery.Form("SELECT configFile FROM logbook_trigger_config WHERE run = %d", GetCurrentRun());
+ TSQLResult* result = fServer[3]->Query(sqlQuery);
+ if (!result)
{
- body += Form("\thttp://pcalimonitor.cern.ch:8889/shuttle.jsp?time=168 \n\n");
- } else {
- body += Form("\thttp://pcalimonitor.cern.ch/shuttle.jsp?instance=PROD?time=168 \n\n");
+ Log("SHUTTLE", Form("ERROR: Can't execute query <%s>!", sqlQuery.Data()));
+ return 0;
}
-
- TString logFolder = "logs";
- if (fConfig->GetRunMode() == AliShuttleConfig::kProd)
- logFolder += "_PROD";
+ if (result->GetRowCount() == 0)
+ {
+ Log("SHUTTLE", "WARNING: Trigger configuration not found in logbook_trigger_config");
+ delete result;
+ return 0;
+ }
- body += Form("Find the %s log for the current run on \n\n"
- "\thttp://pcalishuttle01.cern.ch:8880/%s/%d/%s_%d.log \n\n",
- fCurrentDetector.Data(), logFolder.Data(), GetCurrentRun(),
- fCurrentDetector.Data(), GetCurrentRun());
- body += Form("The last 10 lines of %s log file are following:\n\n", fCurrentDetector.Data());
-
- AliDebug(2, Form("Body begin: %s", body.Data()));
-
- mailBody << body.Data();
- mailBody.close();
- mailBody.open(bodyFileName, ofstream::out | ofstream::app);
-
- TString logFileName = Form("%s/%d/%s_%d.log", GetShuttleLogDir(), GetCurrentRun(),
- fCurrentDetector.Data(), GetCurrentRun());
- TString tailCommand = Form("tail -n 10 %s >> %s", logFileName.Data(), bodyFileName.Data());
- if (gSystem->Exec(tailCommand.Data()))
+ TSQLRow* row = result->Next();
+ if (!row)
{
- mailBody << Form("%s log file not found ...\n\n", fCurrentDetector.Data());
+ Log("SHUTTLE", "ERROR: Could not receive logbook_trigger_config data");
+ delete result;
+ return 0;
}
- TString endBody = Form("------------------------------------------------------\n\n");
- endBody += Form("In case of problems please contact the SHUTTLE core team.\n\n");
- endBody += "Please do not answer this message directly, it is automatically generated.\n\n";
- endBody += "Greetings,\n\n \t\t\tthe SHUTTLE\n";
-
- AliDebug(2, Form("Body end: %s", endBody.Data()));
-
- mailBody << endBody.Data();
-
- mailBody.close();
-
- // send mail!
- TString mailCommand = Form("mail -s \"%s\" -c %s %s < %s",
- subject.Data(),
- cc.Data(),
- to.Data(),
- bodyFileName.Data());
- AliDebug(2, Form("mail command: %s", mailCommand.Data()));
-
- Bool_t result = gSystem->Exec(mailCommand.Data());
-
- return result == 0;
+ // static, so that pointer remains valid when it is returned to the calling class
+ static TString triggerConfig(row->GetField(0));
+
+ delete row;
+ row = 0;
+
+ delete result;
+ result = 0;
+
+ Log("SHUTTLE", Form("Found trigger configuration: %s", triggerConfig.Data()));
+
+ return triggerConfig;
}
//______________________________________________________________________________________________
-const char* AliShuttle::GetRunType()
+const char* AliShuttle::GetCTPTimeParams()
{
- //
- // returns run type read from "run type" logbook
- //
+ // Receives the CTP time parameters from the DAQ logbook for the current run
+
+ // check connection, if needed reconnect
+ if (!Connect(3))
+ return 0;
- if(!fLogbookEntry) {
- AliError("No logbook entry!");
+ TString sqlQuery;
+ sqlQuery.Form("SELECT alignmentFile FROM logbook_trigger_config WHERE run = %d", GetCurrentRun());
+ TSQLResult* result = fServer[3]->Query(sqlQuery);
+ if (!result)
+ {
+ Log("SHUTTLE", Form("ERROR: Can't execute query <%s>!", sqlQuery.Data()));
+ return 0;
+ }
+
+ if (result->GetRowCount() == 0)
+ {
+ Log("SHUTTLE", "WARNING: CTP time params not found in logbook_trigger_config");
+ delete result;
+ return 0;
+ }
+
+ TSQLRow* row = result->Next();
+ if (!row)
+ {
+ Log("SHUTTLE", "ERROR: Could not receive logbook_trigger_config data");
+ delete result;
return 0;
}
- return fLogbookEntry->GetRunType();
+ // static, so that pointer remains valid when it is returned to the calling class
+ static TString triggerTimeParams(row->GetField(0));
+
+ delete row;
+ row = 0;
+
+ delete result;
+ result = 0;
+
+ Log("SHUTTLE", Form("Found trigger time parameters: %s", triggerTimeParams.Data()));
+
+ return triggerTimeParams;
}
//______________________________________________________________________________________________
-Bool_t AliShuttle::GetHLTStatus()
+const char* AliShuttle::GetTriggerDetectorMask()
{
- // Return HLT status (ON=1 OFF=0)
- // Converts the HLT status from the status string read in the run logbook (not just a bool)
+ // Receives the trigger detector mask from DAQ logbook
+
+ // check connection, if needed reconnect
+ if (!Connect(3))
+ return 0;
- if(!fLogbookEntry) {
- AliError("No logbook entry!");
+ TString sqlQuery;
+ sqlQuery.Form("SELECT BIN(BIT_OR(inputDetectorMask)) from logbook_trigger_clusters WHERE run = %d;", GetCurrentRun());
+ TSQLResult* result = fServer[3]->Query(sqlQuery);
+ if (!result)
+ {
+ Log("SHUTTLE", Form("ERROR: Can't execute query <%s>!", sqlQuery.Data()));
+ return 0;
+ }
+
+ if (result->GetRowCount() == 0)
+ {
+ Log("SHUTTLE", "ERROR: Trigger Detector Mask not found in logbook_trigger_clusters");
+ delete result;
+ return 0;
+ }
+
+ TSQLRow* row = result->Next();
+ if (!row)
+ {
+ Log("SHUTTLE", "ERROR: Could not receive logbook_trigger_clusters data");
+ delete result;
return 0;
}
- // TODO implement when HLTStatus is inserted in run logbook
- //TString hltStatus = fLogbookEntry->GetRunParameter("HLTStatus");
- //if(hltStatus == "OFF") {return kFALSE};
-
- return kTRUE;
+ // static, so that pointer remains valid when it is returned to the calling class
+ static TString triggerDetectorMask(row->GetField(0));
+
+ delete row;
+ row = 0;
+
+ delete result;
+ result = 0;
+
+ Log("SHUTTLE", Form("Found Trigger Detector Mask: %s", triggerDetectorMask.Data()));
+
+ return triggerDetectorMask;
}
//______________________________________________________________________________________________
fgkShuttleLogDir = gSystem->ExpandPathName(logDir);
}
+//______________________________________________________________________________________________
+Bool_t AliShuttle::TouchFile()
+{
+ //
+ // touching a file on the grid if run has been DONE
+ //
+
+ if (!gGrid)
+ {
+ Log("SHUTTLE",Form("No TGrid connection estabilished!"));
+ Log("SHUTTLE",Form("Could not touch file for run %i",GetCurrentRun()));
+ return kFALSE;
+ }
+
+ TString dir;
+ dir.Form("%s%d/SHUTTLE_DONE", fConfig->GetAlienPath(), GetCurrentYear());
+ // checking whether directory for touch command exists
+ TString commandLs;
+ commandLs.Form("ls %s",dir.Data());
+ TGridResult *resultLs = dynamic_cast<TGridResult*>(gGrid->Command(commandLs));
+ if (!resultLs){
+ Log("SHUTTLE",Form("No result for %s command, returning without touching",commandLs.Data()));
+ return kFALSE;
+ }
+ TMap *mapLs = dynamic_cast<TMap*>(resultLs->At(0));
+ if (!mapLs){
+ Log("SHUTTLE",Form("No map for %s command, returning without touching",commandLs.Data()));
+ delete resultLs;
+ resultLs = 0x0;
+ return kFALSE;
+ }
+ TObjString *valueLsPath = dynamic_cast<TObjString*>(mapLs->GetValue("path"));
+ if (!valueLsPath || (valueLsPath->GetString()).CompareTo(dir)!=1){
+ Log("SHUTTLE",Form("No directory %s found, creating it",dir.Data()));
+
+ // creating the directory
+
+ Bool_t boolMkdir = gGrid->Mkdir(dir.Data());
+ if (!boolMkdir) {
+ Log("SHUTTLE",Form("Impossible to create dir %s in alien catalogue for run %i!",dir.Data(),GetCurrentRun()));
+ delete resultLs;
+ resultLs = 0x0;
+ return kFALSE;
+ }
+ Log("SHUTTLE",Form("Directory %s successfully created in alien catalogue for run %i",dir.Data(),GetCurrentRun()));
+ }
+ else {
+ Log("SHUTTLE",Form("Directory %s correctly found for run %i",dir.Data(),GetCurrentRun()));
+ }
+
+ delete resultLs;
+ resultLs = 0x0;
+
+ TString command;
+ command.Form("touch %s/%i", dir.Data(), GetCurrentRun());
+ Log("SHUTTLE", Form("Creating entry in file catalog: %s", command.Data()));
+ TGridResult *resultTouch = dynamic_cast<TGridResult*>(gGrid->Command(command));
+ if (!resultTouch){
+ Log("SHUTTLE",Form("No result for touching command, returning without touching for run %i",GetCurrentRun()));
+ return kFALSE;
+ }
+ TMap *mapTouch = dynamic_cast<TMap*>(resultTouch->At(0));
+ if (!mapTouch){
+ Log("SHUTTLE",Form("No map for touching command, returning without touching for run %i",GetCurrentRun()));
+ delete resultTouch;
+ resultTouch = 0x0;
+ return kFALSE;
+ }
+ TObjString *valueTouch = dynamic_cast<TObjString*>(mapTouch->GetValue("__result__"));
+ if (!valueTouch){
+ Log("SHUTTLE",Form("No value for \"__result__\" key set in the map for touching command, returning without touching for run %i",GetCurrentRun()));
+ delete resultTouch;
+ resultTouch = 0x0;
+ return kFALSE;
+ }
+ if (valueTouch->GetString()!="1"){
+ Log("SHUTTLE",Form("Failing the touching command, returning without touching for run %i",GetCurrentRun()));
+ delete resultTouch;
+ resultTouch = 0x0;
+ return kFALSE;
+ }
+ delete resultTouch;
+ resultTouch = 0x0;
+ Log("SHUTTLE", "Sucessfully touched the file");
+ return kTRUE;
+}
+//______________________________________________________________________________________________
+UInt_t AliShuttle::GetStartTimeDCSQuery()
+{
+ // Return Start Time for the DCS query
+ //
+ // The call is delegated to AliShuttleInterface
+
+ return GetCurrentStartTime()-fConfig->GetDCSQueryOffset();
+}
+//______________________________________________________________________________________________
+UInt_t AliShuttle::GetEndTimeDCSQuery()
+{
+ // Return End Time for the DCS query
+ //
+ // The call is delegated to AliShuttleInterface
+
+ return GetCurrentEndTime()+fConfig->GetDCSQueryOffset();
+}
+