#include <TFile.h>
#include <TGrid.h>
#include <TGridResult.h>
+#include <TMap.h>
#include <TMonaLisaWriter.h>
fPreprocessorMap(),
fLogbookEntry(0),
fCurrentDetector(),
+fFirstProcessing(0),
+fFXSError(-1),
fStatusEntry(0),
fMonitoringMutex(0),
fLastActionTime(0),
// Then calls StoreRefFilesToGrid to store reference files.
//
+ UpdateShuttleStatus(AliShuttleStatus::kStoreStarted);
+
if (fTestMode & kErrorGrid)
{
Log("SHUTTLE", "StoreOCDB - In TESTMODE - Simulating error while storing in the Grid");
}
Log("SHUTTLE","StoreOCDB - Storing OCDB data ...");
- Bool_t resultCDB = StoreOCDB(fgkMainCDB);
+ Int_t resultCDB = StoreOCDB(fgkMainCDB);
Log("SHUTTLE","StoreOCDB - Storing reference data ...");
- Bool_t resultRef = StoreOCDB(fgkMainRefStorage);
+ Int_t resultRef = StoreOCDB(fgkMainRefStorage);
Log("SHUTTLE","StoreOCDB - Storing reference files ...");
Bool_t resultRefFiles = CopyFilesToGrid("reference");
resultMetadata = CopyFilesToGrid("metadata");
}
- return resultCDB && resultRef && resultRefFiles && resultMetadata;
+ Int_t storeResult = 0;
+
+ if (resultCDB < 0 || resultRef < 0 || resultRefFiles == kFALSE || resultMetadata == kFALSE)
+ storeResult = -1;
+ else if (resultCDB > 0 || resultRef > 0)
+ storeResult = 1;
+
+ if (storeResult < 0)
+ {
+ Log("SHUTTLE",
+ Form("\t\t\t****** run %d - %s: STORAGE ERROR ******",
+ GetCurrentRun(), fCurrentDetector.Data()));
+ UpdateShuttleStatus(AliShuttleStatus::kStoreError);
+ }
+ else if (storeResult > 0)
+ {
+ Log("SHUTTLE",
+ Form("\t\t\t****** run %d - %s: STORAGE DELAYED ******",
+ GetCurrentRun(), fCurrentDetector.Data()));
+ UpdateShuttleStatus(AliShuttleStatus::kStoreDelayed);
+ }
+ else if (storeResult == 0)
+ {
+ Log("SHUTTLE",
+ Form("\t\t\t****** run %d - %s: DONE ******",
+ GetCurrentRun(), fCurrentDetector.Data()));
+ UpdateShuttleStatus(AliShuttleStatus::kDone);
+ UpdateShuttleLogbook(fCurrentDetector, "DONE");
+ }
+
+ return (storeResult == 0);
}
//______________________________________________________________________________________________
-Bool_t AliShuttle::StoreOCDB(const TString& gridURI)
+Int_t AliShuttle::StoreOCDB(const TString& gridURI)
{
//
// Called by StoreOCDB(), performs actual storage to the main OCDB and reference storages (Grid)
//
+ // Return code:
+ // -2 initialization error
+ // -1 storage error
+ // 0 success
+ // 1 storage delayed (e.g. previous unprocessed runs)
+ //
TObjArray* gridIds=0;
Bool_t result = kTRUE;
+ Bool_t delayed = kFALSE;
const char* type = 0;
TString localURI;
localURI = fgkLocalRefStorage;
} else {
AliError(Form("Invalid storage URI: %s", gridURI.Data()));
- return kFALSE;
+ return -2;
}
AliCDBManager* man = AliCDBManager::Instance();
if(!gridSto) {
Log("SHUTTLE",
Form("StoreOCDB - cannot activate main %s storage", type));
- return kFALSE;
+ return -2;
}
gridIds = gridSto->GetQueryCDBList();
if(!localSto) {
Log("SHUTTLE",
Form("StoreOCDB - cannot activate local %s storage", type));
- return kFALSE;
+ return -2;
}
AliCDBPath aPath(GetOfflineDetName(fCurrentDetector.Data()),"*","*");
// Local objects were stored with current run as Grid version!
AliCDBId aLocId = aLocEntry->GetId();
aLocEntry->SetVersion(-1);
aLocEntry->SetSubVersion(-1);
+
+ Log(fCurrentDetector.Data(), Form("Attempting to store %s", aLocId.ToString().Data()));
// If local object is valid up to infinity we store it only if it is
// the first unprocessed run!
Log("SHUTTLE", Form("StoreOCDB - %s: object %s has validity infinite but "
"there are previous unprocessed runs!",
fCurrentDetector.Data(), aLocId.GetPath().Data()));
- result = kFALSE;
+ Log(fCurrentDetector.Data(), Form("StoreOCDB - %s: object %s has validity infinite but "
+ "there are previous unprocessed runs!",
+ fCurrentDetector.Data(), aLocId.GetPath().Data()));
+ delayed = kTRUE;
continue;
}
Bool_t store = kTRUE;
TIter gridIter(gridIds);
AliCDBId* aGridId = 0;
- while((aGridId = dynamic_cast<AliCDBId*> (gridIter.Next()))){
- if(aGridId->GetPath() != aLocId.GetPath()) continue;
+ while ((aGridId = dynamic_cast<AliCDBId*> (gridIter.Next()))) {
+ if (aGridId->GetPath() != aLocId.GetPath())
+ continue;
// skip all objects valid up to infinity
- if(aGridId->GetLastRun() == AliCDBRunRange::Infinity()) continue;
+ if (aGridId->GetLastRun() == AliCDBRunRange::Infinity())
+ continue;
+
// if we get here, it means there's already some more recent object stored on Grid!
+ Log(fCurrentDetector.Data(),
+ Form("StoreOCDB - A more recent object already exists in %s storage: <%s>",
+ type, aGridId->ToString().Data()));
+
store = kFALSE;
break;
}
- // If we get here, the file can be stored!
- Bool_t storeOk = gridSto->Put(aLocEntry);
- if(!store || storeOk){
-
- if (!store)
- {
- Log(fCurrentDetector.Data(),
- Form("StoreOCDB - A more recent object already exists in %s storage: <%s>",
- type, aGridId->ToString().Data()));
- } else {
+ Bool_t storeOk = kFALSE;
+ if (store)
+ {
+ Log(fCurrentDetector.Data(), Form("Prechecks succeeded. Ready to store %s", aLocId.ToString().Data()));
+ storeOk = gridSto->Put(aLocEntry);
+ if (storeOk) {
Log("SHUTTLE",
- Form("StoreOCDB - Object <%s> successfully put into %s storage",
- aLocId.ToString().Data(), type));
+ Form("StoreOCDB - Object <%s> successfully put into %s storage",
+ aLocId.ToString().Data(), type));
Log(fCurrentDetector.Data(),
Form("StoreOCDB - Object <%s> successfully put into %s storage",
- aLocId.ToString().Data(), type));
+ aLocId.ToString().Data(), type));
+ } else {
+ Log("SHUTTLE",
+ Form("StoreOCDB - Grid %s storage of object <%s> failed",
+ type, aLocId.ToString().Data()));
+ Log(fCurrentDetector.Data(),
+ Form("StoreOCDB - Grid %s storage of object <%s> failed",
+ type, aLocId.ToString().Data()));
+ result = kFALSE;
}
-
- // removing local filename...
+ }
+
+ if (!store || storeOk) {
+ // removing local file...
TString filename;
localSto->IdToFilename(aLocId, filename);
Log("SHUTTLE", Form("StoreOCDB - Removing local file %s", filename.Data()));
RemoveFile(filename.Data());
- continue;
- } else {
- Log("SHUTTLE",
- Form("StoreOCDB - Grid %s storage of object <%s> failed",
- type, aLocId.ToString().Data()));
- Log(fCurrentDetector.Data(),
- Form("StoreOCDB - Grid %s storage of object <%s> failed",
- type, aLocId.ToString().Data()));
- result = kFALSE;
}
}
localEntries->Clear();
- return result;
+ Int_t returnCode = 0;
+
+ if (result == kFALSE)
+ returnCode = -1;
+ else if (delayed != kFALSE)
+ returnCode = 1;
+
+ Log("SHUTTLE", Form("StoreOCDB - Returning with %d (result = %d, delayed = %d)", returnCode, result, delayed));
+ Log(fCurrentDetector.Data(), Form("StoreOCDB - Returning with %d (result = %d, delayed = %d)", returnCode, result, delayed));
+
+ return returnCode;
}
//______________________________________________________________________________________________
lhcPeriod.Data()));
}
- TString target = Form("%s/GRP/RunMetadata/alice/data/%d/%s/%09d/raw/%s",
- localBaseFolder.Data(), GetCurrentYear(),
+ TString target = Form("%s/GRP/RunMetadata%s%d/%s/%09d/raw/%s",
+ localBaseFolder.Data(), fConfig->GetAlienPath(), GetCurrentYear(),
lhcPeriod.Data(), GetCurrentRun(), gridFileName);
return CopyFileLocally(localFile, target);
lhcPeriod.Append(Form("_%s", partition.Data()));
}
- dir = Form("%s/GRP/RunMetadata/alice/data/%d/%s/%09d/raw",
- localBaseFolder.Data(), GetCurrentYear(),
+ dir = Form("%s/GRP/RunMetadata%s%d/%s/%09d/raw",
+ localBaseFolder.Data(), fConfig->GetAlienPath(), GetCurrentYear(),
lhcPeriod.Data(), GetCurrentRun());
- alienDir = dir(dir.Index("/alice/data/"), dir.Length());
+ alienDir = dir(dir.Index(fConfig->GetAlienPath()), dir.Length());
begin = "";
}
return kFALSE;
}
- SendMLInfo();
+ SendMLDetInfo();
return kTRUE;
}
AliCDBManager::Instance()->GetStorage(fgkLocalCDB)->Put(fStatusEntry);
- SendMLInfo();
+ SendMLDetInfo();
}
//______________________________________________________________________________________________
-void AliShuttle::SendMLInfo()
+void AliShuttle::SendMLDetInfo()
{
//
// sends ML information about the current status of the current detector being processed
AliShuttleStatus* status = dynamic_cast<AliShuttleStatus*> (fStatusEntry->GetObject());
if (!status){
- Log("SHUTTLE", "SendMLInfo - UNEXPECTED: status could not be read from current CDB entry");
+ Log("SHUTTLE", "SendMLDetInfo - UNEXPECTED: status could not be read from current CDB entry");
return;
}
// checks if the processing should be continued
// if yes it returns kTRUE and updates the AliShuttleStatus with nextStatus
- if (!fConfig->HostProcessDetector(fCurrentDetector)) return kFALSE;
+ if (!fConfig->HostProcessDetector(fCurrentDetector))
+ return kFALSE;
AliPreprocessor* aPreprocessor =
dynamic_cast<AliPreprocessor*> (fPreprocessorMap.GetValue(fCurrentDetector));
AliShuttleLogbookEntry::Status entryStatus =
fLogbookEntry->GetDetectorStatus(fCurrentDetector);
- if(entryStatus != AliShuttleLogbookEntry::kUnprocessed) {
+ if (entryStatus != AliShuttleLogbookEntry::kUnprocessed) {
Log("SHUTTLE", Form("ContinueProcessing - %s is %s",
fCurrentDetector.Data(),
fLogbookEntry->GetDetectorStatusName(entryStatus)));
}
}
+ // Is the subdetector processed first time for this run?
+ fFirstProcessing = kFALSE;
+
AliShuttleStatus* status = ReadShuttleStatus();
if (!status) {
// first time
Log("SHUTTLE", Form("ContinueProcessing - %s: Processing first time",
fCurrentDetector.Data()));
status = new AliShuttleStatus(AliShuttleStatus::kStarted);
+ fFirstProcessing = kTRUE;
return WriteShuttleStatus(status);
}
- // The following two cases shouldn't happen if Shuttle Logbook was correctly updated.
+ // The following case shouldn't happen if Shuttle Logbook was correctly updated.
// If it happens it may mean Logbook updating failed... let's do it now!
if (status->GetStatus() == AliShuttleStatus::kDone ||
- status->GetStatus() == AliShuttleStatus::kFailed){
+ status->GetStatus() == AliShuttleStatus::kFailed ||
+ status->GetStatus() == AliShuttleStatus::kSkipped) {
Log("SHUTTLE", Form("ContinueProcessing - %s is already %s. Updating Shuttle Logbook",
fCurrentDetector.Data(),
status->GetStatusName(status->GetStatus())));
- UpdateShuttleLogbook(fCurrentDetector.Data(),
- status->GetStatusName(status->GetStatus()));
+
+ if (status->GetStatus() == AliShuttleStatus::kSkipped)
+ {
+ UpdateShuttleLogbook(fCurrentDetector.Data(), "DONE");
+ }
+ else
+ UpdateShuttleLogbook(fCurrentDetector.Data(), status->GetStatusName(status->GetStatus()));
+
return kFALSE;
}
- if (status->GetStatus() == AliShuttleStatus::kStoreStarted || status->GetStatus() == AliShuttleStatus::kStoreError) {
+ if (status->GetStatus() == AliShuttleStatus::kStoreStarted || status->GetStatus() == AliShuttleStatus::kStoreDelayed ||status->GetStatus() == AliShuttleStatus::kStoreError) {
Log("SHUTTLE",
Form("ContinueProcessing - %s: Grid storage of one or more "
"objects failed. Trying again now",
fCurrentDetector.Data()));
- UpdateShuttleStatus(AliShuttleStatus::kStoreStarted);
- if (StoreOCDB()){
- Log("SHUTTLE", Form("ContinueProcessing - %s: all objects "
- "successfully stored into main storage",
- fCurrentDetector.Data()));
- UpdateShuttleStatus(AliShuttleStatus::kDone);
- UpdateShuttleLogbook(fCurrentDetector, "DONE");
- } else {
- Log("SHUTTLE",
- Form("ContinueProcessing - %s: Grid storage failed again",
- fCurrentDetector.Data()));
- UpdateShuttleStatus(AliShuttleStatus::kStoreError);
- }
+ StoreOCDB();
return kFALSE;
}
// Send mail to detector expert!
Log("SHUTTLE", Form("ContinueProcessing - Sending mail to %s expert...",
- fCurrentDetector.Data()));
- if (!SendMail())
+ fCurrentDetector.Data()));
+ if (!SendMail(kPPEMail))
Log("SHUTTLE", Form("ContinueProcessing - Could not send mail to %s expert",
- fCurrentDetector.Data()));
+ fCurrentDetector.Data()));
} else {
Log("SHUTTLE", Form("ContinueProcessing - %s: restarting. "
status->GetStatusName(), status->GetCount()));
Bool_t increaseCount = kTRUE;
if (status->GetStatus() == AliShuttleStatus::kDCSError ||
- status->GetStatus() == AliShuttleStatus::kDCSStarted)
+ status->GetStatus() == AliShuttleStatus::kDCSStarted ||
+ status->GetStatus() == AliShuttleStatus::kFXSError)
increaseCount = kFALSE;
UpdateShuttleStatus(AliShuttleStatus::kStarted, increaseCount);
return cont;
}
+//______________________________________________________________________________________________
+void AliShuttle::SendMLRunInfo(const char* status)
+{
+ //
+ // Send information about this run to ML
+
+ TMonaLisaText mlStatus("SHUTTLE_status", status);
+ TString runType(fLogbookEntry->GetRunType());
+ if (strlen(fLogbookEntry->GetRunParameter("log")) > 0){
+
+ runType += "(";
+ runType += fLogbookEntry->GetRunParameter("log");
+ runType += ")";
+ }
+ TMonaLisaText mlRunType("SHUTTLE_runtype", runType);
+
+ TList mlList;
+ mlList.Add(&mlStatus);
+ mlList.Add(&mlRunType);
+
+ TString mlID;
+ mlID.Form("%d", GetCurrentRun());
+ fMonaLisa->SendParameters(&mlList, mlID);
+}
+
+//______________________________________________________________________________________________
+Int_t AliShuttle::GetMem(Int_t pid)
+{
+ // invokes ps to get the memory consumption of the process <pid>
+ // returns -1 in case of error
+
+ TString checkStr;
+ checkStr.Form("ps -o vsize --pid %d | tail -n 1", pid);
+ FILE* pipe = gSystem->OpenPipe(checkStr, "r");
+ if (!pipe)
+ {
+ Log("SHUTTLE", Form("Process - Error: "
+ "Could not open pipe to %s", checkStr.Data()));
+ return -1;
+ }
+
+ char buffer[100];
+ if (!fgets(buffer, 100, pipe))
+ {
+ Log("SHUTTLE", "Process - Error: ps did not return anything");
+ gSystem->ClosePipe(pipe);
+ return -1;
+ }
+ gSystem->ClosePipe(pipe);
+
+ //Log("SHUTTLE", Form("ps returned %s", buffer));
+
+ Int_t mem = 0;
+ if ((sscanf(buffer, "%d\n", &mem) != 1) || !mem)
+ {
+ Log("SHUTTLE", "Process - Error: Could not parse output of ps");
+ return -1;
+ }
+
+ return mem;
+}
+
//______________________________________________________________________________________________
Bool_t AliShuttle::Process(AliShuttleLogbookEntry* entry)
{
Log("SHUTTLE", Form("\t\t\t^*^*^*^*^*^*^*^*^*^*^*^* run %d: START ^*^*^*^*^*^*^*^*^*^*^*^*",
GetCurrentRun()));
+ CountOpenRuns();
+
// Send the information to ML
- TMonaLisaText mlStatus("SHUTTLE_status", "Processing");
- TMonaLisaText mlRunType("SHUTTLE_runtype", Form("%s (%s)", entry->GetRunType(), entry->GetRunParameter("log")));
-
- TList mlList;
- mlList.Add(&mlStatus);
- mlList.Add(&mlRunType);
-
- TString mlID;
- mlID.Form("%d", GetCurrentRun());
- fMonaLisa->SendParameters(&mlList, mlID);
+ SendMLRunInfo("Processing");
if (fLogbookEntry->IsDone())
{
return 0;
}
- if (fgkMainCDB.Length() == 0)
- fgkMainCDB = Form("alien://folder=/alice/data/%d/%s/OCDB?user=alidaq?cacheFold=/tmp/OCDBCache",
- GetCurrentYear(), lhcPeriod.Data());
+ // build cdb paths (repeat each time, LHCperiod might have changed)
+ fgkMainCDB.Form("alien://folder=%s%d/%s/OCDB?user=alidaq?cacheFold=/tmp/OCDBCache",
+ fConfig->GetAlienPath(), GetCurrentYear(), lhcPeriod.Data());
- if (fgkMainRefStorage.Length() == 0)
- fgkMainRefStorage = Form("alien://folder=/alice/data/%d/%s/Reference?user=alidaq?cacheFold=/tmp/OCDBCache",
- GetCurrentYear(), lhcPeriod.Data());
+ fgkMainRefStorage.Form("alien://folder=%s%d/%s/Reference?user=alidaq?cacheFold=/tmp/OCDBCache",
+ fConfig->GetAlienPath(), GetCurrentYear(), lhcPeriod.Data());
// Loop on detectors in the configuration
TIter iter(fConfig->GetDetectors());
{
fCurrentDetector = aDetector->String();
- if (ContinueProcessing() == kFALSE) continue;
+ if (ContinueProcessing() == kFALSE)
+ continue;
if (first)
{
GetCurrentRun(), aDetector->GetName()));
for(Int_t iSys=0;iSys<3;iSys++) fFXSCalled[iSys]=kFALSE;
+
+ Int_t initialMem = GetMem(getpid());
+ Log("SHUTTLE", Form("Memory consumption before forking is %d", initialMem));
Log(fCurrentDetector.Data(), "Process - Starting processing");
if (expiredTime > fConfig->GetPPTimeOut())
{
- TString tmp;
- tmp.Form("Process - Process of %s time out. "
- "Run time: %d seconds. Killing...",
- fCurrentDetector.Data(), expiredTime);
- Log("SHUTTLE", tmp);
- Log(fCurrentDetector, tmp);
+ TString logMsg;
+ AliShuttleStatus *currentStatus = ReadShuttleStatus();
+ AliShuttleStatus::Status newStatus = AliShuttleStatus::kInvalid;
+
+ if (currentStatus->GetStatus() <= AliShuttleStatus::kPPDone)
+ {
+ // in case pp not yet done set status to kPPTimeOut
+
+ logMsg.Form("Process - Process of %s timed out. Run time: %d seconds. Killing...",
+ fCurrentDetector.Data(), expiredTime);
+ newStatus = AliShuttleStatus::kPPTimeOut;
+ }
+ else if (currentStatus->GetStatus() == AliShuttleStatus::kStoreStarted)
+ {
+ // in case the pp goes in TimeOut while storing the objects in the OCDB
+ // set status to kStoreError
+
+ logMsg.Form("Process - Process of %s timed out while storing the OCDB object. Run time: %d seconds. Killing... and setting status to StoreError.",
+ fCurrentDetector.Data(), expiredTime);
+ newStatus = AliShuttleStatus::kStoreError;
+ }
+ else
+ {
+ // in other cases don't change the status
+
+ logMsg.Form("Process - Process of %s timed out in status = %s. Run time: %d seconds. Killing... without changing the status",
+ fCurrentDetector.Data(), currentStatus->GetStatusName(), expiredTime);
+ }
+
+ Log("SHUTTLE", logMsg);
+ Log(fCurrentDetector, logMsg);
kill(pid, 9);
- UpdateShuttleStatus(AliShuttleStatus::kPPTimeOut);
+ if (newStatus != AliShuttleStatus::kInvalid)
+ UpdateShuttleStatus(newStatus);
hasError = kTRUE;
gSystem->Sleep(1000);
{
gSystem->Sleep(1000);
- TString checkStr;
- checkStr.Form("ps -o vsize --pid %d | tail -n 1", pid);
- FILE* pipe = gSystem->OpenPipe(checkStr, "r");
- if (!pipe)
- {
- Log("SHUTTLE", Form("Process - Error: "
- "Could not open pipe to %s", checkStr.Data()));
+ Int_t mem = GetMem(pid);
+
+ if (mem < 0)
continue;
- }
- char buffer[100];
- if (!fgets(buffer, 100, pipe))
- {
- Log("SHUTTLE", "Process - Error: ps did not return anything");
- gSystem->ClosePipe(pipe);
- continue;
- }
- gSystem->ClosePipe(pipe);
-
- //Log("SHUTTLE", Form("ps returned %s", buffer));
-
- Int_t mem = 0;
- if ((sscanf(buffer, "%d\n", &mem) != 1) || !mem)
- {
- Log("SHUTTLE", "Process - Error: Could not parse output of ps");
- continue;
- }
+ mem -= initialMem;
+ if (mem < 0)
+ mem = 0;
if (expiredTime % 60 == 0)
{
}
else if (pid == 0)
{
- // client
- Log("SHUTTLE", Form("Process - In client process of %d - %s", GetCurrentRun(),
+ // child
+ Log("SHUTTLE", Form("Process - In child process of %d - %s", GetCurrentRun(),
aDetector->GetName()));
Log("SHUTTLE", Form("Process - Redirecting output to %s log",fCurrentDetector.Data()));
gSystem->Exit(1);
}
- Bool_t success = ProcessCurrentDetector();
-
+ Int_t success = ProcessCurrentDetector();
+
gSystem->ChangeDirectory(wd.Data());
- if (success) // Preprocessor finished successfully!
+ if (success == 1) // Preprocessor finished successfully!
{
// remove temporary folder or DCS map
if (!fConfig->KeepTempFolder())
fCurrentDetector.Data()));
// Transfer the data from local storage to main storage (Grid)
- UpdateShuttleStatus(AliShuttleStatus::kStoreStarted);
if (StoreOCDB() == kFALSE)
- {
- Log("SHUTTLE",
- Form("\t\t\t****** run %d - %s: STORAGE ERROR ******",
- GetCurrentRun(), aDetector->GetName()));
- UpdateShuttleStatus(AliShuttleStatus::kStoreError);
success = kFALSE;
- } else {
- Log("SHUTTLE",
- Form("\t\t\t****** run %d - %s: DONE ******",
- GetCurrentRun(), aDetector->GetName()));
- UpdateShuttleStatus(AliShuttleStatus::kDone);
- UpdateShuttleLogbook(fCurrentDetector, "DONE");
- }
- } else
+ }
+ else if (success == 0)
{
Log("SHUTTLE",
Form("\t\t\t****** run %d - %s: PP ERROR ******",
fFirstUnprocessed[iDet] = kFALSE;
}
}
+ SendMLRunInfo("Pending");
}
}
}
//______________________________________________________________________________________________
-Bool_t AliShuttle::ProcessCurrentDetector()
+Int_t AliShuttle::ProcessCurrentDetector()
{
//
// Makes data retrieval just for a specific detector (fCurrentDetector).
TString wd = gSystem->WorkingDirectory();
if (!CleanReferenceStorage(fCurrentDetector.Data()))
- return kFALSE;
+ return 0;
gSystem->ChangeDirectory(wd.Data());
- TMap* dcsMap = new TMap();
-
// call preprocessor
AliPreprocessor* aPreprocessor =
dynamic_cast<AliPreprocessor*> (fPreprocessorMap.GetValue(fCurrentDetector));
+ // check if the preprocessor wants to process this run type
+ if (aPreprocessor->ProcessRunType() == kFALSE)
+ {
+ UpdateShuttleStatus(AliShuttleStatus::kSkipped);
+ UpdateShuttleLogbook(fCurrentDetector, "DONE");
+ Log(fCurrentDetector, Form("ProcessCurrentDetector - %s preprocessor is not interested in this run type", fCurrentDetector.Data()));
+
+ return 2;
+ }
+
+ TMap* dcsMap = new TMap();
+
aPreprocessor->Initialize(GetCurrentRun(), GetCurrentStartTime(), GetCurrentEndTime());
Bool_t processDCS = aPreprocessor->ProcessDCS();
UpdateShuttleStatus(AliShuttleStatus::kDCSStarted);
UpdateShuttleStatus(AliShuttleStatus::kDCSError);
delete dcsMap;
- return kFALSE;
+ return 0;
} else {
UpdateShuttleStatus(AliShuttleStatus::kDCSStarted);
TMap* aliasMap = 0;
TMap* dpMap = 0;
-
+
if (fConfig->GetDCSAliases(fCurrentDetector, iServ)->GetEntries() > 0)
{
+ Log(fCurrentDetector, Form("Querying %d DCS aliases", fConfig->GetDCSAliases(fCurrentDetector, iServ)->GetEntries()));
aliasMap = GetValueSet(host, port,
fConfig->GetDCSAliases(fCurrentDetector, iServ),
kAlias, multiSplit);
" Sending mail to DCS experts!", host.Data()));
UpdateShuttleStatus(AliShuttleStatus::kDCSError);
- if (!SendMailToDCS())
+ if (!SendMail(kDCSEMail))
Log("SHUTTLE", Form("ProcessCurrentDetector - "
- "Could not send mail to DCS experts!"));
+ "Could not send mail to DCS experts!"));
delete dcsMap;
- return kFALSE;
+ return 0;
}
}
if (fConfig->GetDCSDataPoints(fCurrentDetector, iServ)->GetEntries() > 0)
{
+ Log(fCurrentDetector, Form("Querying %d DCS data points", fConfig->GetDCSDataPoints(fCurrentDetector, iServ)->GetEntries()));
dpMap = GetValueSet(host, port,
fConfig->GetDCSDataPoints(fCurrentDetector, iServ),
kDP, multiSplit);
" Sending mail to DCS experts!", host.Data()));
UpdateShuttleStatus(AliShuttleStatus::kDCSError);
- if (!SendMailToDCS())
+ if (!SendMail(kDCSEMail))
Log("SHUTTLE", Form("ProcessCurrentDetector - "
- "Could not send mail to DCS experts!"));
+ "Could not send mail to DCS experts!"));
if (aliasMap) delete aliasMap;
delete dcsMap;
- return kFALSE;
+ return 0;
}
}
// DCS Archive DB processing successful. Call Preprocessor!
UpdateShuttleStatus(AliShuttleStatus::kPPStarted);
+ fFXSError = -1; // this variable is kTRUE after ::Process if an FXS error occured
+
UInt_t returnValue = aPreprocessor->Process(dcsMap);
+
+ if (fFXSError!=-1) {
+ UpdateShuttleStatus(AliShuttleStatus::kFXSError);
+ SendMail(kFXSEMail, fFXSError);
+ dcsMap->DeleteAll();
+ delete dcsMap;
+ return 0;
+ }
if (returnValue > 0) // Preprocessor error!
{
UpdateShuttleStatus(AliShuttleStatus::kPPError);
dcsMap->DeleteAll();
delete dcsMap;
- return kFALSE;
+ return 0;
}
// preprocessor ok!
dcsMap->DeleteAll();
delete dcsMap;
- return kTRUE;
+ return 1;
}
//______________________________________________________________________________________________
for (Int_t ii = 0; ii < aResult->GetFieldCount(); ii++)
entry->SetRunParameter(aResult->GetFieldName(ii), aRow->GetField(ii));
+ delete aRow;
+ delete aResult;
+
UInt_t startTime = entry->GetStartTime();
UInt_t endTime = entry->GetEndTime();
-
-// if (!startTime || !endTime || startTime > endTime)
-// {
-// Log("SHUTTLE",
-// Form("QueryRunParameters - Invalid parameters for Run %d: startTime = %d, endTime = %d. Skipping!",
-// run, startTime, endTime));
-//
-// Log("SHUTTLE", Form("Marking SHUTTLE done for run %d", run));
-// fLogbookEntry = entry;
-// if (!UpdateShuttleLogbook("shuttle_done"))
-// {
-// AliError(Form("Could not update logbook for run %d !", run));
-// }
-// fLogbookEntry = 0;
-//
-// delete entry;
-// delete aRow;
-// delete aResult;
-// return 0;
-// }
-
- if (!startTime)
+ Bool_t ecsSuccess = entry->GetECSSuccess();
+
+ TString totEventsStr = entry->GetRunParameter("totalEvents");
+ Int_t totEvents = totEventsStr.Atoi();
+
+ UInt_t now = time(0);
+ // TODO make this a configuration parameter
+ Int_t dcsDelay = 120;
+
+ // runs are accepted if they have ecsSuccess set or more than 1 event
+ if (startTime != 0 && endTime != 0 && endTime > startTime && (totEvents > 1 || ecsSuccess) && (endTime < now - dcsDelay))
{
- Log("SHUTTLE",
- Form("QueryRunParameters - Invalid parameters for Run %d: "
- "startTime = %d, endTime = %d. Skipping!",
- run, startTime, endTime));
-
- Log("SHUTTLE", Form("Marking SHUTTLE done for run %d", run));
- fLogbookEntry = entry;
- if (!UpdateShuttleLogbook("shuttle_ignored"))
- {
- AliError(Form("Could not update logbook for run %d !", run));
- }
- fLogbookEntry = 0;
+ if (ecsSuccess == kFALSE)
+ Log("SHUTTLE", Form("Processing run %d although in status ECS failure, Reason: %s", run, entry->GetRunParameter("eor_reason")));
+ return entry;
+ }
+
+ Bool_t skip = kFALSE;
- delete entry;
- delete aRow;
- delete aResult;
- return 0;
+ if (endTime != 0 && endTime >= now - dcsDelay)
+ {
+ Log("SHUTTLE", Form("Skipping run %d for now, because DCS buffer time is not yet expired", run));
}
-
- if (startTime && !endTime)
+ else if (totEvents <= 1)
{
- // TODO Here we don't mark SHUTTLE done, because this may mean
- //the run is still ongoing!!
- Log("SHUTTLE",
- Form("QueryRunParameters - Invalid parameters for Run %d: "
- "startTime = %d, endTime = %d. Skipping (Shuttle won't be marked as DONE)!",
- run, startTime, endTime));
-
- //Log("SHUTTLE", Form("Marking SHUTTLE done for run %d", run));
- //fLogbookEntry = entry;
- //if (!UpdateShuttleLogbook("shuttle_done"))
- //{
- // AliError(Form("Could not update logbook for run %d !", run));
- //}
- //fLogbookEntry = 0;
-
- delete entry;
- delete aRow;
- delete aResult;
- return 0;
+ Log("SHUTTLE", Form("QueryRunParameters - Run %d has 1 event or less - Skipping!", run));
+ skip = kTRUE;
}
-
- if (startTime && endTime && (startTime > endTime))
+ else
{
- Log("SHUTTLE",
- Form("QueryRunParameters - Invalid parameters for Run %d: "
- "startTime = %d, endTime = %d. Skipping!",
- run, startTime, endTime));
-
- Log("SHUTTLE", Form("Marking SHUTTLE done for run %d", run));
- fLogbookEntry = entry;
- if (!UpdateShuttleLogbook("shuttle_ignored"))
- {
- AliError(Form("Could not update logbook for run %d !", run));
- }
- fLogbookEntry = 0;
-
- delete entry;
- delete aRow;
- delete aResult;
- return 0;
+ Log("SHUTTLE", Form("QueryRunParameters - Invalid parameters for Run %d: "
+ "startTime = %d, endTime = %d. Skipping (Shuttle won't be marked as DONE)!",
+ run, startTime, endTime));
}
-
- TString totEventsStr = entry->GetRunParameter("totalEvents");
- Int_t totEvents = totEventsStr.Atoi();
- if (totEvents < 1)
+
+ if (skip)
{
- Log("SHUTTLE",
- Form("QueryRunParameters - Run %d has 0 events - Skipping!", run));
-
- Log("SHUTTLE", Form("Marking SHUTTLE done for run %d", run));
- fLogbookEntry = entry;
- if (!UpdateShuttleLogbook("shuttle_ignored"))
+ Log("SHUTTLE", Form("Marking SHUTTLE skipped for run %d", run));
+ fLogbookEntry = entry;
+ if (!UpdateShuttleLogbook("shuttle_skipped"))
{
AliError(Form("Could not update logbook for run %d !", run));
}
fLogbookEntry = 0;
-
- delete entry;
- delete aRow;
- delete aResult;
- return 0;
}
-
- delete aRow;
- delete aResult;
-
- return entry;
+
+ delete entry;
+ return 0;
}
//______________________________________________________________________________________________
if (!Connect(system))
{
Log(detector, Form("GetFile - Couldn't connect to %s FXS database", GetSystemName(system)));
+ fFXSError = system;
return 0;
}
else if (system == kHLT)
{
whereClause += Form(" and DDLnumbers=\"%s\"", source);
- nFields = 3;
}
TString sqlQuery = Form("%s %s", sqlQueryStart.Data(), whereClause.Data());
TSQLResult* aResult = 0;
aResult = dynamic_cast<TSQLResult*> (fServer[system]->Query(sqlQuery));
if (!aResult) {
- Log(detector, Form("GetFileName - Can't execute SQL query to %s database for: id = %s, source = %s",
+ Log(detector, Form("GetFile - Can't execute SQL query to %s database for: id = %s, source = %s",
GetSystemName(system), id, sourceName.Data()));
+ fFXSError = system;
return 0;
}
- if(aResult->GetRowCount() == 0)
+ if (aResult->GetRowCount() == 0)
{
Log(detector,
- Form("GetFileName - No entry in %s FXS db for: id = %s, source = %s",
+ Form("GetFile - No entry in %s FXS db for: id = %s, source = %s",
GetSystemName(system), id, sourceName.Data()));
delete aResult;
return 0;
if (aResult->GetRowCount() > 1) {
Log(detector,
- Form("GetFileName - More than one entry in %s FXS db for: id = %s, source = %s",
+ Form("GetFile - More than one entry in %s FXS db for: id = %s, source = %s",
GetSystemName(system), id, sourceName.Data()));
+ fFXSError = system;
delete aResult;
return 0;
}
Log(detector,
Form("GetFileName - Wrong field count in %s FXS db for: id = %s, source = %s",
GetSystemName(system), id, sourceName.Data()));
+ fFXSError = system;
delete aResult;
return 0;
}
TSQLRow* aRow = dynamic_cast<TSQLRow*> (aResult->Next());
if (!aRow){
- Log(detector, Form("GetFileName - Empty set result in %s FXS db from query: id = %s, source = %s",
+ Log(detector, Form("GetFile - Empty set result in %s FXS db from query: id = %s, source = %s",
GetSystemName(system), id, sourceName.Data()));
+ fFXSError = system;
delete aResult;
return 0;
}
Bool_t result = kFALSE;
// copy!! if successful TSystem::Exec returns 0
- while(nRetries++ < maxRetries) {
+ while (nRetries++ < maxRetries) {
AliDebug(2, Form("Trying to copy file. Retry # %d", nRetries));
result = RetrieveFile(system, filePath.Data(), localFileName.Data());
- if(!result)
+ if (!result)
{
- Log(detector, Form("GetFileName - Copy of file %s from %s FXS failed",
+ Log(detector, Form("GetFile - Copy of file %s from %s FXS failed",
filePath.Data(), GetSystemName(system)));
continue;
}
if (fileSize.Length()>0)
{
// compare filesize of local file with the one stored in the FXS DB
- TString command=("stat --format=%s");
- Int_t sizeComp = gSystem->Exec(Form("%s %s |grep %s 2>&1 > /dev/null",
- command.Data(), localFileName.Data(),fileSize.Data()));
+ Long_t size = -1;
+ Int_t sizeComp = gSystem->GetPathInfo(localFileName.Data(), 0, &size, 0, 0);
- if ( sizeComp != 0)
+ if (sizeComp != 0 || size != fileSize.Atoi())
{
- Log(detector, Form("GetFileName - size of file %s does not match with local copy!",
+ Log(detector, Form("GetFile - size of file %s does not match with local copy!",
filePath.Data()));
result = kFALSE;
continue;
if (fileChecksum.Length()>0)
{
// compare md5sum of local file with the one stored in the FXS DB
- Int_t md5Comp = gSystem->Exec(Form("md5sum %s |grep %s 2>&1 > /dev/null",
+ if(fileChecksum.Contains(' ')) fileChecksum.Resize(fileChecksum.First(' '));
+ Int_t md5Comp = gSystem->Exec(Form("md5sum %s |grep %s > /dev/null 2> /dev/null",
localFileName.Data(), fileChecksum.Data()));
if (md5Comp != 0)
{
- Log(detector, Form("GetFileName - md5sum of file %s does not match with local copy!",
+ Log(detector, Form("GetFile - md5sum of file %s does not match with local copy!",
filePath.Data()));
result = kFALSE;
continue;
if (result) break;
}
- if(!result) return 0;
+ if (!result)
+ {
+ fFXSError = system;
+ return 0;
+ }
fFXSCalled[system]=kTRUE;
TObjString *fileParams = new TObjString(Form("%s#!?!#%s", id, sourceName.Data()));
}
}
- TString baseFXSFolder;
- if (system == kDAQ)
- {
- baseFXSFolder = "FES/";
- }
- else if (system == kDCS)
- {
- baseFXSFolder = "";
- }
- else if (system == kHLT)
- {
- baseFXSFolder = "/opt/FXS/";
- }
-
-
- TString command = Form("scp -oPort=%d -2 %s@%s:%s%s %s",
+ TString command = Form("scp -oPort=%d -2 %s@%s:%s/%s %s",
fConfig->GetFXSPort(system),
fConfig->GetFXSUser(system),
fConfig->GetFXSHost(system),
- baseFXSFolder.Data(),
+ fConfig->GetFXSBaseFolder(system),
fxsFileName,
localFileName);
if (!Connect(system))
{
Log(detector, Form("GetFileSources - Couldn't connect to %s FXS database", GetSystemName(system)));
+ fFXSError = system;
return NULL;
}
if (!aResult) {
Log(detector, Form("GetFileSources - Can't execute SQL query to %s database for id: %s",
GetSystemName(system), id));
+ fFXSError = system;
return 0;
}
TString detName(detector);
TString setClause;
- if (detName == "shuttle_done" || detName == "shuttle_ignored")
+ if (detName == "shuttle_done" || detName == "shuttle_skipped")
{
setClause = "set shuttle_done=1";
-
+
if (detName == "shuttle_done")
{
- // Send the information to ML
- TMonaLisaText mlStatus("SHUTTLE_status", "Done");
-
- TList mlList;
- mlList.Add(&mlStatus);
-
- TString mlID;
- mlID.Form("%d", GetCurrentRun());
- fMonaLisa->SendParameters(&mlList, mlID);
+ if (TouchFile() != kTRUE)
+ {
+ SendMLRunInfo("Pending");
+ return kFALSE;
+ }
+
+ SendMLRunInfo("Done");
}
- } else {
+ else
+ SendMLRunInfo("Skipped");
+ }
+ else {
TString statusStr(status);
if(statusStr.Contains("done", TString::kIgnoreCase) ||
statusStr.Contains("failed", TString::kIgnoreCase)){
gSystem->FreeDirectory(dir);
}
- TString toLog = Form("%s (%d): %s - ", TTimeStamp(time(0)).AsString("s"), getpid(), detector);
+ TString toLog = Form("%s UTC (%d): %s - ", TTimeStamp(time(0)).AsString("s"), getpid(), detector);
if (GetCurrentRun() >= 0)
toLog += Form("run %d - ", GetCurrentRun());
toLog += Form("%s", message);
{
// query Shuttle logbook for earlier runs, check if some detectors are unprocessed,
// flag them into fFirstUnprocessed array
- TString whereClause(Form("where shuttle_done=0 and run < %d", run));
+ TString whereClauseBis(Form("where shuttle_done=0 and run < %d", run));
TObjArray tmpLogbookEntries;
- if (!QueryShuttleLogbook(whereClause, tmpLogbookEntries))
+ if (!QueryShuttleLogbook(whereClauseBis, tmpLogbookEntries))
{
Log("SHUTTLE", "Collect - Can't retrieve entries from Shuttle logbook");
return kFALSE;
}
//______________________________________________________________________________________________
-Bool_t AliShuttle::SendMail()
+Bool_t AliShuttle::SendMail(EMailTarget target, Int_t system)
{
//
// sends a mail to the subdetector expert in case of preprocessor error
if (fTestMode != kNone)
return kTRUE;
- if (!fConfig->SendMail()) return kTRUE;
-
- TString to="";
- TIter iterExperts(fConfig->GetResponsibles(fCurrentDetector));
- TObjString *anExpert=0;
- while ((anExpert = (TObjString*) iterExperts.Next()))
- {
- to += Form("%s,", anExpert->GetName());
- }
- if (to.Length() > 0)
- to.Remove(to.Length()-1);
- AliDebug(2, Form("to: %s",to.Data()));
+ if (!fConfig->SendMail())
+ return kTRUE;
- if (to.IsNull()) {
- Log("SHUTTLE", "List of detector responsibles not set!");
- return kFALSE;
+ if (target == kDCSEMail || target == kFXSEMail) {
+ if (!fFirstProcessing)
+ return kTRUE;
}
+ Int_t runMode = (Int_t)fConfig->GetRunMode();
+ TString tmpStr;
+ if (runMode == 0) tmpStr = " Nightly Test:";
+ else tmpStr = " Data Taking:";
void* dir = gSystem->OpenDirectory(GetShuttleLogDir());
if (dir == NULL)
{
gSystem->FreeDirectory(dir);
}
- TString bodyFileName;
- bodyFileName.Form("%s/mail.body", GetShuttleLogDir());
- gSystem->ExpandPathName(bodyFileName);
-
- ofstream mailBody;
- mailBody.open(bodyFileName, ofstream::out);
-
- if (!mailBody.is_open())
- {
- Log("SHUTTLE", Form("Could not open mail body file %s", bodyFileName.Data()));
- return kFALSE;
- }
-
- TString cc="";
- TIter iterAdmins(fConfig->GetAdmins(AliShuttleConfig::kGlobal));
- TObjString *anAdmin=0;
- while ((anAdmin = (TObjString*) iterAdmins.Next()))
- {
- cc += Form("%s,", anAdmin->GetName());
- }
- if (cc.Length() > 0)
- cc.Remove(to.Length()-1);
- AliDebug(2, Form("cc: %s",to.Data()));
-
- TString subject = Form("%s Shuttle preprocessor FAILED in run %d (run type = %s)!",
- fCurrentDetector.Data(), GetCurrentRun(), GetRunType());
- AliDebug(2, Form("subject: %s", subject.Data()));
-
- TString body = Form("Dear %s expert(s), \n\n", fCurrentDetector.Data());
- body += Form("SHUTTLE just detected that your preprocessor "
- "failed processing run %d (run type = %s)!!\n\n",
- GetCurrentRun(), GetRunType());
- body += Form("Please check %s status on the SHUTTLE monitoring page: \n\n",
- fCurrentDetector.Data());
- if (fConfig->GetRunMode() == AliShuttleConfig::kTest)
- {
- body += Form("\thttp://pcalimonitor.cern.ch:8889/shuttle.jsp?time=168 \n\n");
- } else {
- body += Form("\thttp://pcalimonitor.cern.ch/shuttle.jsp?instance=PROD&time=168 \n\n");
+ // det experts in to
+ TString to="";
+ TIter *iterExperts = 0;
+ if (target == kDCSEMail) {
+ iterExperts = new TIter(fConfig->GetAdmins(AliShuttleConfig::kAmanda));
}
-
-
- TString logFolder = "logs";
- if (fConfig->GetRunMode() == AliShuttleConfig::kProd)
- logFolder += "_PROD";
-
-
- body += Form("Find the %s log for the current run on \n\n"
- "\thttp://pcalishuttle01.cern.ch:8880/%s/%d/%s_%d.log \n\n",
- fCurrentDetector.Data(), logFolder.Data(), GetCurrentRun(),
- fCurrentDetector.Data(), GetCurrentRun());
- body += Form("The last 10 lines of %s log file are following:\n\n", fCurrentDetector.Data());
-
- AliDebug(2, Form("Body begin: %s", body.Data()));
-
- mailBody << body.Data();
- mailBody.close();
- mailBody.open(bodyFileName, ofstream::out | ofstream::app);
-
- TString logFileName = Form("%s/%d/%s_%d.log", GetShuttleLogDir(),
- GetCurrentRun(), fCurrentDetector.Data(), GetCurrentRun());
- TString tailCommand = Form("tail -n 10 %s >> %s", logFileName.Data(), bodyFileName.Data());
- if (gSystem->Exec(tailCommand.Data()))
- {
- mailBody << Form("%s log file not found ...\n\n", fCurrentDetector.Data());
+ else if (target == kFXSEMail) {
+ iterExperts = new TIter(fConfig->GetAdmins(system));
}
-
- TString endBody = Form("------------------------------------------------------\n\n");
- endBody += Form("In case of problems please contact the SHUTTLE core team.\n\n");
- endBody += "Please do not answer this message directly, it is automatically generated.\n\n";
- endBody += "Greetings,\n\n \t\t\tthe SHUTTLE\n";
-
- AliDebug(2, Form("Body end: %s", endBody.Data()));
-
- mailBody << endBody.Data();
-
- mailBody.close();
-
- // send mail!
- TString mailCommand = Form("mail -s \"%s\" -c %s %s < %s",
- subject.Data(),
- cc.Data(),
- to.Data(),
- bodyFileName.Data());
- AliDebug(2, Form("mail command: %s", mailCommand.Data()));
-
- Bool_t result = gSystem->Exec(mailCommand.Data());
-
- return result == 0;
-}
-
-//______________________________________________________________________________________________
-Bool_t AliShuttle::SendMailToDCS()
-{
- //
- // sends a mail to the DCS Amanda experts in case of DCS data point retrieval error
- //
-
- if (fTestMode != kNone)
- return kTRUE;
-
- if (!fConfig->SendMail()) return kTRUE;
-
- void* dir = gSystem->OpenDirectory(GetShuttleLogDir());
- if (dir == NULL)
- {
- if (gSystem->mkdir(GetShuttleLogDir(), kTRUE))
+ if (iterExperts) {
+ TObjString *anExpert=0;
+ while ((anExpert = (TObjString*) iterExperts->Next()))
{
- Log("SHUTTLE", Form("SendMailToDCS - Can't open directory <%s>", GetShuttleLogDir()));
- return kFALSE;
+ to += Form("%s,", anExpert->GetName());
}
-
- } else {
- gSystem->FreeDirectory(dir);
+ delete iterExperts;
}
- TString bodyFileName;
- bodyFileName.Form("%s/mail.body", GetShuttleLogDir());
- gSystem->ExpandPathName(bodyFileName);
-
- ofstream mailBody;
- mailBody.open(bodyFileName, ofstream::out);
-
- if (!mailBody.is_open())
- {
- Log("SHUTTLE", Form("SendMailToDCS - Could not open mail body file %s", bodyFileName.Data()));
- return kFALSE;
- }
-
- TString to="";
- TIter iterExperts(fConfig->GetAdmins(AliShuttleConfig::kAmanda));
+ // add subdetector experts
+ iterExperts = new TIter(fConfig->GetResponsibles(fCurrentDetector));
TObjString *anExpert=0;
- while ((anExpert = (TObjString*) iterExperts.Next()))
+ while ((anExpert = (TObjString*) iterExperts->Next()))
{
to += Form("%s,", anExpert->GetName());
}
+ delete iterExperts;
+
if (to.Length() > 0)
to.Remove(to.Length()-1);
AliDebug(2, Form("to: %s",to.Data()));
if (to.IsNull()) {
- Log("SHUTTLE", "List of Amanda server administrators not set!");
+ Log("SHUTTLE", Form("List of %d responsibles not set!", (Int_t) target));
return kFALSE;
}
+ // SHUTTLE responsibles in cc
TString cc="";
TIter iterAdmins(fConfig->GetAdmins(AliShuttleConfig::kGlobal));
TObjString *anAdmin=0;
cc += Form("%s,", anAdmin->GetName());
}
if (cc.Length() > 0)
- cc.Remove(to.Length()-1);
+ cc.Remove(cc.Length()-1);
AliDebug(2, Form("cc: %s",to.Data()));
- TString subject = Form("Retrieval of data points for %s FAILED in run %d !",
- fCurrentDetector.Data(), GetCurrentRun());
- AliDebug(2, Form("subject: %s", subject.Data()));
+ // mail body
+ TString bodyFileName;
+ bodyFileName.Form("%s/mail.body", GetShuttleLogDir());
+ gSystem->ExpandPathName(bodyFileName);
+
+ ofstream mailBody;
+ mailBody.open(bodyFileName, ofstream::out);
+
+ if (!mailBody.is_open())
+ {
+ Log("SHUTTLE", Form("Could not open mail body file %s", bodyFileName.Data()));
+ return kFALSE;
+ }
+
+
+ TString subject;
+ TString body;
+
+ if (target == kDCSEMail){
+ subject = Form("%s CRITICAL Retrieval of data points for %s FAILED in run %d !",
+ tmpStr.Data(), fCurrentDetector.Data(), GetCurrentRun());
+ AliDebug(2, Form("subject: %s", subject.Data()));
+
+ body = Form("Dear DCS experts, \n\n");
+ body += Form("SHUTTLE couldn\'t retrieve the data points for detector %s "
+ "in run %d!!\n\n", fCurrentDetector.Data(), GetCurrentRun());
+ }
+ else if (target == kFXSEMail){
+ subject = Form("%s CRITICAL FXS communication for %s FAILED in run %d !",
+ tmpStr.Data(), fCurrentDetector.Data(), GetCurrentRun());
+ AliDebug(2, Form("subject: %s", subject.Data()));
+ TString sys;
+ if (system == kDAQ) sys="DAQ";
+ else if (system == kDCS) sys="DCS";
+ else if (system == kHLT) sys="HLT";
+ else return kFALSE;
+ body = Form("Dear %s FXS experts, \n\n",sys.Data());
+ body += Form("SHUTTLE couldn\'t retrieve data from the FXS for detector %s "
+ "in run %d!!\n\n", fCurrentDetector.Data(), GetCurrentRun());
+ }
+ else {
+ subject = Form("%s %s Shuttle preprocessor FAILED in run %d (run type = %s)!",
+ tmpStr.Data(), fCurrentDetector.Data(), GetCurrentRun(), GetRunType());
+ AliDebug(2, Form("subject: %s", subject.Data()));
+
+ body = Form("Dear %s expert(s), \n\n", fCurrentDetector.Data());
+ body += Form("SHUTTLE just detected that your preprocessor "
+ "failed processing run %d (run type = %s)!!\n\n",
+ GetCurrentRun(), GetRunType());
+ }
- TString body = Form("Dear DCS experts, \n\n");
- body += Form("SHUTTLE couldn\'t retrieve the data points for detector %s "
- "in run %d!!\n\n", fCurrentDetector.Data(), GetCurrentRun());
body += Form("Please check %s status on the SHUTTLE monitoring page: \n\n",
fCurrentDetector.Data());
if (fConfig->GetRunMode() == AliShuttleConfig::kTest)
{
- body += Form("\thttp://pcalimonitor.cern.ch:8889/shuttle.jsp?time=168 \n\n");
+ body += Form("\thttp://pcalimonitor.cern.ch/shuttle.jsp?time=24 \n\n");
} else {
- body += Form("\thttp://pcalimonitor.cern.ch/shuttle.jsp?instance=PROD?time=168 \n\n");
+ body += Form("\thttp://pcalimonitor.cern.ch/shuttle.jsp?instance=PROD&time=24 \n\n");
}
-
+
+
TString logFolder = "logs";
if (fConfig->GetRunMode() == AliShuttleConfig::kProd)
logFolder += "_PROD";
mailBody.close();
mailBody.open(bodyFileName, ofstream::out | ofstream::app);
- TString logFileName = Form("%s/%d/%s_%d.log", GetShuttleLogDir(), GetCurrentRun(),
- fCurrentDetector.Data(), GetCurrentRun());
+ TString logFileName = Form("%s/%d/%s_%d.log", GetShuttleLogDir(),
+ GetCurrentRun(), fCurrentDetector.Data(), GetCurrentRun());
TString tailCommand = Form("tail -n 10 %s >> %s", logFileName.Data(), bodyFileName.Data());
if (gSystem->Exec(tailCommand.Data()))
{
Bool_t AliShuttle::GetHLTStatus()
{
// Return HLT status (ON=1 OFF=0)
- // Converts the HLT status from the status string read in the run logbook (not just a bool)
+ // Converts the HLT status from the mode string read in the run logbook (not just a bool)
if(!fLogbookEntry) {
AliError("No logbook entry!");
return 0;
}
- // TODO implement when HLTStatus is inserted in run logbook
- //TString hltStatus = fLogbookEntry->GetRunParameter("HLTStatus");
- //if(hltStatus == "OFF") {return kFALSE};
+ // TODO implement when HLTMode is inserted in run logbook
+ TString hltMode = fLogbookEntry->GetRunParameter("HLTmode");
+ TSubString firstChar = hltMode(0,1);
+ AliDebug(2,Form("First char = %s ",firstChar.Data()));
+ if (firstChar == "A") {
+ return kFALSE;
+ }
+ else if ((firstChar == "B") || (firstChar == "C") || (firstChar == "D") || (firstChar == "E")) {
+ return kTRUE;
+ }
+ else {
+ Log("SHUTTLE","Unexpected HLT mode! Returning 0....");
+ return kFALSE;
+ }
+}
- return kTRUE;
+//______________________________________________________________________________________________
+const char* AliShuttle::GetTriggerConfiguration()
+{
+ // Receives the trigger configuration from the DAQ logbook for the current run
+
+ // check connection, if needed reconnect
+ if (!Connect(3))
+ return 0;
+
+ TString sqlQuery;
+ sqlQuery.Form("SELECT configFile FROM logbook_trigger_config WHERE run = %d", GetCurrentRun());
+ TSQLResult* result = fServer[3]->Query(sqlQuery);
+ if (!result)
+ {
+ Log("SHUTTLE", Form("ERROR: Can't execute query <%s>!", sqlQuery.Data()));
+ return 0;
+ }
+
+ if (result->GetRowCount() == 0)
+ {
+ Log("SHUTTLE", "ERROR: Trigger configuration not found in logbook_trigger_config");
+ delete result;
+ return 0;
+ }
+
+ TSQLRow* row = result->Next();
+ if (!row)
+ {
+ Log("SHUTTLE", "ERROR: Could not receive logbook_trigger_config data");
+ delete result;
+ return 0;
+ }
+
+ // static, so that pointer remains valid when it is returned to the calling class
+ static TString triggerConfig(row->GetField(0));
+
+ delete row;
+ row = 0;
+
+ delete result;
+ result = 0;
+
+ Log("SHUTTLE", Form("Found trigger configuration: %s", triggerConfig.Data()));
+
+ return triggerConfig;
}
//______________________________________________________________________________________________
fgkShuttleLogDir = gSystem->ExpandPathName(logDir);
}
+//______________________________________________________________________________________________
+Bool_t AliShuttle::TouchFile()
+{
+ //
+ // touching a file on the grid if run has been DONE
+ //
+
+ if (!gGrid)
+ {
+ Log("SHUTTLE",Form("No TGrid connection estabilished!"));
+ Log("SHUTTLE",Form("Could not touch file for run %i",GetCurrentRun()));
+ return kFALSE;
+ }
+
+ TString dir;
+ dir.Form("%s%d/%s/SHUTTLE_DONE", fConfig->GetAlienPath(), GetCurrentYear(), GetLHCPeriod());
+ // checking whether directory for touch command exists
+ TString commandLs;
+ commandLs.Form("ls %s",dir.Data());
+ TGridResult *resultLs = dynamic_cast<TGridResult*>(gGrid->Command(commandLs));
+ if (!resultLs){
+ Log("SHUTTLE",Form("No result for %s command, returning without touching",commandLs.Data()));
+ return kFALSE;
+ }
+ TMap *mapLs = dynamic_cast<TMap*>(resultLs->At(0));
+ if (!mapLs){
+ Log("SHUTTLE",Form("No map for %s command, returning without touching",commandLs.Data()));
+ delete resultLs;
+ resultLs = 0x0;
+ return kFALSE;
+ }
+ TObjString *valueLsPath = dynamic_cast<TObjString*>(mapLs->GetValue("path"));
+ if (!valueLsPath || (TString)(valueLsPath->GetString()).CompareTo(dir)!=1){
+ Log("SHUTTLE",Form("No directory %s found, creating it",dir.Data()));
+
+ // creating the directory
+
+ Bool_t boolMkdir = gGrid->Mkdir(dir.Data());
+ if (!boolMkdir) {
+ Log("SHUTTLE",Form("Impossible to create dir %s in alien catalogue for run %i!",dir.Data(),GetCurrentRun()));
+ delete resultLs;
+ resultLs = 0x0;
+ return kFALSE;
+ }
+ Log("SHUTTLE",Form("Directory %s successfully created in alien catalogue for run %i",dir.Data(),GetCurrentRun()));
+ }
+ else {
+ Log("SHUTTLE",Form("Directory %s correctly found for run %i",dir.Data(),GetCurrentRun()));
+ }
+
+ delete resultLs;
+ resultLs = 0x0;
+
+ TString command;
+ command.Form("touch %s/%i", dir.Data(), GetCurrentRun());
+ Log("SHUTTLE", Form("Creating entry in file catalog: %s", command.Data()));
+ TGridResult *resultTouch = dynamic_cast<TGridResult*>(gGrid->Command(command));
+ if (!resultTouch){
+ Log("SHUTTLE",Form("No result for touching command, returning without touching for run %i",GetCurrentRun()));
+ return kFALSE;
+ }
+ TMap *mapTouch = dynamic_cast<TMap*>(resultTouch->At(0));
+ if (!mapTouch){
+ Log("SHUTTLE",Form("No map for touching command, returning without touching for run %i",GetCurrentRun()));
+ delete resultTouch;
+ resultTouch = 0x0;
+ return kFALSE;
+ }
+ TObjString *valueTouch = dynamic_cast<TObjString*>(mapTouch->GetValue("__result__"));
+ if (!valueTouch){
+ Log("SHUTTLE",Form("No value for \"__result__\" key set in the map for touching command, returning without touching for run %i",GetCurrentRun()));
+ delete resultTouch;
+ resultTouch = 0x0;
+ return kFALSE;
+ }
+ if (valueTouch->GetString()!="1"){
+ Log("SHUTTLE",Form("Failing the touching command, returning without touching for run %i",GetCurrentRun()));
+ delete resultTouch;
+ resultTouch = 0x0;
+ return kFALSE;
+ }
+ delete resultTouch;
+ resultTouch = 0x0;
+ Log("SHUTTLE", "Sucessfully touched the file");
+ return kTRUE;
+}
+
+