/*
$Log$
+Revision 1.38 2007/04/12 08:26:18 jgrosseo
+updated comment
+
+Revision 1.37 2007/04/10 16:53:14 jgrosseo
+redirecting sub detector stdout, stderr to sub detector log file
+
+Revision 1.35 2007/04/04 16:26:38 acolla
+1. Re-organization of function calls in TestPreprocessor to make it more meaningful.
+2. Added missing dependency in test preprocessors.
+3. in AliShuttle.cxx: processing time and memory consumption info on a single line.
+
+Revision 1.34 2007/04/04 10:33:36 jgrosseo
+1) Storing of files to the Grid is now done _after_ your preprocessors succeeded. This is transparent, which means that you can still use the same functions (Store, StoreReferenceData) to store files to the Grid. However, the Shuttle first stores them locally and transfers them after the preprocessor finished. The return code of these two functions has changed from UInt_t to Bool_t which gives you the success of the storing.
+In case of an error with the Grid, the Shuttle will retry the storing later, the preprocessor does not need to be run again.
+
+2) The meaning of the return code of the preprocessor has changed. 0 is now success and any other value means failure. This value is stored in the log and you can use it to keep details about the error condition.
+
+3) New function StoreReferenceFile to _directly_ store a file (without opening it) to the reference storage.
+
+4) The memory usage of the preprocessor is monitored. If it exceeds 2 GB it is terminated.
+
+5) New function AliPreprocessor::ProcessDCS(). If you do not need to have DCS data in all cases, you can skip the processing by implemting this function and returning kFALSE under certain conditions. E.g. if there is a certain run type.
+If you always need DCS data (like before), you do not need to implement it.
+
+6) The run type has been added to the monitoring page
+
+Revision 1.33 2007/04/03 13:56:01 acolla
+Grid Storage at the end of preprocessing. Added virtual method to disable DCS query according to the
+run type.
+
+Revision 1.32 2007/02/28 10:41:56 acolla
+Run type field added in SHUTTLE framework. Run type is read from "run type" logbook and retrieved by
+AliPreprocessor::GetRunType() function.
+Added some ldap definition files.
+
+Revision 1.30 2007/02/13 11:23:21 acolla
+Moved getters and setters of Shuttle's main OCDB/Reference, local
+OCDB/Reference, temp and log folders to AliShuttleInterface
+
+Revision 1.27 2007/01/30 17:52:42 jgrosseo
+adding monalisa monitoring
+
+Revision 1.26 2007/01/23 19:20:03 acolla
+Removed old ldif files, added TOF, MCH ldif files. Added some options in
+AliShuttleConfig::Print. Added in Ali Shuttle: SetShuttleTempDir and
+SetShuttleLogDir
+
+Revision 1.25 2007/01/15 19:13:52 acolla
+Moved some AliInfo to AliDebug in SendMail function
+
+Revision 1.21 2006/12/07 08:51:26 jgrosseo
+update (alberto):
+table, db names in ldap configuration
+added GRP preprocessor
+DCS data can also be retrieved by data point
+
+Revision 1.20 2006/11/16 16:16:48 jgrosseo
+introducing strict run ordering flag
+removed giving preprocessor name to preprocessor, they have to know their name themselves ;-)
+
+Revision 1.19 2006/11/06 14:23:04 jgrosseo
+major update (Alberto)
+o) reading of run parameters from the logbook
+o) online offline naming conversion
+o) standalone DCSclient package
+
+Revision 1.18 2006/10/20 15:22:59 jgrosseo
+o) Adding time out to the execution of the preprocessors: The Shuttle forks and the parent process monitors the child
+o) Merging Collect, CollectAll, CollectNew function
+o) Removing implementation of empty copy constructors (declaration still there!)
+
+Revision 1.17 2006/10/05 16:20:55 jgrosseo
+adapting to new CDB classes
+
+Revision 1.16 2006/10/05 15:46:26 jgrosseo
+applying to the new interface
+
+Revision 1.15 2006/10/02 16:38:39 jgrosseo
+update (alberto):
+fixed memory leaks
+storing of objects that failed to be stored to the grid before
+interfacing of shuttle status table in daq system
+
+Revision 1.14 2006/08/29 09:16:05 jgrosseo
+small update
+
+Revision 1.13 2006/08/15 10:50:00 jgrosseo
+effc++ corrections (alberto)
+
+Revision 1.12 2006/08/08 14:19:29 jgrosseo
+Update to shuttle classes (Alberto)
+
+- Possibility to set the full object's path in the Preprocessor's and
+Shuttle's Store functions
+- Possibility to extend the object's run validity in the same classes
+("startValidity" and "validityInfinite" parameters)
+- Implementation of the StoreReferenceData function to store reference
+data in a dedicated CDB storage.
+
+Revision 1.11 2006/07/21 07:37:20 jgrosseo
+last run is stored after each run
+
+Revision 1.10 2006/07/20 09:54:40 jgrosseo
+introducing status management: The processing per subdetector is divided into several steps,
+after each step the status is stored on disk. If the system crashes in any of the steps the Shuttle
+can keep track of the number of failures and skips further processing after a certain threshold is
+exceeded. These thresholds can be configured in LDAP.
+
+Revision 1.9 2006/07/19 10:09:55 jgrosseo
+new configuration, accesst to DAQ FES (Alberto)
+
Revision 1.8 2006/07/11 12:44:36 jgrosseo
adding parameters for extended validity range of data produced by preprocessor
#include "AliCDBManager.h"
#include "AliCDBStorage.h"
#include "AliCDBId.h"
+#include "AliCDBRunRange.h"
+#include "AliCDBPath.h"
+#include "AliCDBEntry.h"
#include "AliShuttleConfig.h"
-#include "AliDCSClient.h"
+#include "DCSClient/AliDCSClient.h"
#include "AliLog.h"
#include "AliPreprocessor.h"
-#include "AliDefaultPreprocessor.h"
+#include "AliShuttleStatus.h"
+#include "AliShuttleLogbookEntry.h"
#include <TSystem.h>
#include <TObject.h>
#include <TSQLServer.h>
#include <TSQLResult.h>
#include <TSQLRow.h>
+#include <TMutex.h>
+#include <TSystemDirectory.h>
+#include <TSystemFile.h>
+#include <TFileMerger.h>
+#include <TGrid.h>
+#include <TGridResult.h>
-ClassImp(AliShuttle)
+#include <TMonaLisaWriter.h>
-TString AliShuttle::fgkLocalUri("local://$ALICE_ROOT/SHUTTLE/ShuttleCDB");
-const char* AliShuttle::fgkShuttleTempDir = "$ALICE_ROOT/SHUTTLE/temp";
+#include <fstream>
-const char* AliShuttle::fgkDetectorName[AliShuttle::fgkNDetectors] = {"SPD", "SDD", "SSD", "TPC", "TRD", "TOF",
- "PHOS", "CPV", "RICH", "EMCAL", "MUON_TRK", "MUON_TRG", "FMD", "ZDC", "PMD", "START", "VZERO"};
+#include <sys/types.h>
+#include <sys/wait.h>
-const char* AliShuttle::fgkDetectorCode[AliShuttle::fgkNDetectors] = {"SPD", "SDD", "SSD", "TPC", "TRD", "TOF",
- "PHS", "CPV", "HMP", "EMC", "MCH", "MTR", "FMD", "ZDC", "PMD", "T00", "V00"};
+ClassImp(AliShuttle)
//______________________________________________________________________________________________
AliShuttle::AliShuttle(const AliShuttleConfig* config,
UInt_t timeout, Int_t retries):
- fConfig(config),
- fTimeout(timeout),
- fRetries(retries), fCurrentRun(-1), fCurrentStartTime(0),
- fCurrentEndTime(0),
- fLog("")
+fConfig(config),
+fTimeout(timeout), fRetries(retries),
+fPreprocessorMap(),
+fLogbookEntry(0),
+fCurrentDetector(),
+fStatusEntry(0),
+fMonitoringMutex(0),
+fLastActionTime(0),
+fLastAction(),
+fMonaLisa(0),
+fTestMode(kNone),
+fReadTestMode(kFALSE),
+fOutputRedirected(kFALSE)
{
//
// config: AliShuttleConfig used
//
if (!fConfig->IsValid()) AliFatal("********** !!!!! Invalid configuration !!!!! **********");
- for(int iSys=0;iSys<3;iSys++) {
+ for(int iSys=0;iSys<4;iSys++) {
fServer[iSys]=0;
- fFESlist[iSys].SetOwner(kTRUE);
+ if (iSys < 3)
+ fFXSlist[iSys].SetOwner(kTRUE);
}
-}
+ fPreprocessorMap.SetOwner(kTRUE);
-//______________________________________________________________________
-AliShuttle::AliShuttle(const AliShuttle& /*other*/):
-AliShuttleInterface()
-{
-// copy constructor (not implemented)
-
-}
-
-//______________________________________________________________________
-AliShuttle &AliShuttle::operator=(const AliShuttle& /*other*/)
-{
-// assignment operator (not implemented)
+ for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
+ fFirstUnprocessed[iDet] = kFALSE;
-return *this;
+ fMonitoringMutex = new TMutex();
}
//______________________________________________________________________________________________
AliShuttle::~AliShuttle()
{
-// destructor
+ //
+ // destructor
+ //
fPreprocessorMap.DeleteAll();
- for(int iSys=0;iSys<3;iSys++)
+ for(int iSys=0;iSys<4;iSys++)
if(fServer[iSys]) {
fServer[iSys]->Close();
delete fServer[iSys];
+ fServer[iSys] = 0;
}
+
+ if (fStatusEntry){
+ delete fStatusEntry;
+ fStatusEntry = 0;
+ }
+
+ if (fMonitoringMutex)
+ {
+ delete fMonitoringMutex;
+ fMonitoringMutex = 0;
+ }
}
//______________________________________________________________________________________________
// with the same identificator (GetName()).
//
- if (fPreprocessorMap.GetValue(preprocessor->GetName())) {
- AliWarning(Form("AliPreprocessor %s is already registered!",
- preprocessor->GetName()));
+ const char* detName = preprocessor->GetName();
+ if(GetDetPos(detName) < 0)
+ AliFatal(Form("********** !!!!! Invalid detector name: %s !!!!! **********", detName));
+
+ if (fPreprocessorMap.GetValue(detName)) {
+ AliWarning(Form("AliPreprocessor %s is already registered!", detName));
return;
}
- fPreprocessorMap.Add(new TObjString(preprocessor->GetName()), preprocessor);
+ fPreprocessorMap.Add(new TObjString(detName), preprocessor);
+}
+//______________________________________________________________________________________________
+Bool_t AliShuttle::Store(const AliCDBPath& path, TObject* object,
+ AliCDBMetaData* metaData, Int_t validityStart, Bool_t validityInfinite)
+{
+ // Stores a CDB object in the storage for offline reconstruction. Objects that are not needed for
+ // offline reconstruction, but should be stored anyway (e.g. for debugging) should NOT be stored
+ // using this function. Use StoreReferenceData instead!
+ // It calls StoreLocally function which temporarily stores the data locally; when the preprocessor
+ // finishes the data are transferred to the main storage (Grid).
+
+ return StoreLocally(fgkLocalCDB, path, object, metaData, validityStart, validityInfinite);
}
//______________________________________________________________________________________________
-UInt_t AliShuttle::Store(const char* detector,
- TObject* object, AliCDBMetaData* metaData, Int_t /*validityStart*/, Bool_t /*validityInfinite*/)
+Bool_t AliShuttle::StoreReferenceData(const AliCDBPath& path, TObject* object, AliCDBMetaData* metaData)
{
- // store data into CDB
- //
- // validityStart is the start validity of the data, if not 0 GetCurrentRun() - validityStart is taken
- // validityInfinite defines if the data is valid until new data arrives (e.g. for calibration runs)
- //
- // returns 0 if fail
- // 1 if stored in main (Grid) storage
- // 2 if stored in backup (Local) storage
+ // Stores a CDB object in the storage for reference data. This objects will not be available during
+ // offline reconstrunction. Use this function for reference data only!
+ // It calls StoreLocally function which temporarily stores the data locally; when the preprocessor
+ // finishes the data are transferred to the main storage (Grid).
- // TODO implement use of two parameters
+ return StoreLocally(fgkLocalRefStorage, path, object, metaData);
+}
- // TODO shouldn't the path be given by the preprocessor???
- AliCDBId id(AliCDBPath(detector, "DCS", "Data"),
- GetCurrentRun(), GetCurrentRun());
+//______________________________________________________________________________________________
+Bool_t AliShuttle::StoreLocally(const TString& localUri,
+ const AliCDBPath& path, TObject* object, AliCDBMetaData* metaData,
+ Int_t validityStart, Bool_t validityInfinite)
+{
+ // Store object temporarily in local storage. Parameters are passed by Store and StoreReferenceData functions.
+ // when the preprocessor finishes the data are transferred to the main storage (Grid).
+ // The parameters are:
+ // 1) Uri of the backup storage (Local)
+ // 2) the object's path.
+ // 3) the object to be stored
+ // 4) the metaData to be associated with the object
+ // 5) the validity start run number w.r.t. the current run,
+ // if the data is valid only for this run leave the default 0
+ // 6) specifies if the calibration data is valid for infinity (this means until updated),
+ // typical for calibration runs, the default is kFALSE
+ //
+ // returns 0 if fail, 1 otherwise
- UInt_t result = 0;
- if (!(AliCDBManager::Instance()->IsDefaultStorageSet())) {
- Log(detector, "No CDB storage set!");
+ if (fTestMode & kErrorStorage)
+ {
+ Log(fCurrentDetector, "StoreLocally - In TESTMODE - Simulating error while storing locally");
+ return kFALSE;
+ }
+
+ const char* cdbType = (localUri == fgkLocalCDB) ? "CDB" : "Reference";
+
+ Int_t firstRun = GetCurrentRun() - validityStart;
+ if(firstRun < 0) {
+ AliWarning("First valid run happens to be less than 0! Setting it to 0.");
+ firstRun=0;
+ }
+
+ Int_t lastRun = -1;
+ if(validityInfinite) {
+ lastRun = AliCDBRunRange::Infinity();
} else {
- result = (UInt_t) AliCDBManager::Instance()->Put(object, id, metaData);
+ lastRun = GetCurrentRun();
}
- if(!result) {
- Log(detector, "Error while storing object in main storage!");
- AliError("local storage will be used!");
+ // Version is set to current run, it will be used later to transfer data to Grid
+ AliCDBId id(path, firstRun, lastRun, GetCurrentRun(), -1);
- AliCDBStorage *origStorage = AliCDBManager::Instance()->GetDefaultStorage();
+ if(! dynamic_cast<TObjString*> (metaData->GetProperty("RunUsed(TObjString)"))){
+ TObjString runUsed = Form("%d", GetCurrentRun());
+ metaData->SetProperty("RunUsed(TObjString)", runUsed.Clone());
+ }
+
+ Bool_t result = kFALSE;
- result = AliCDBManager::Instance()->GetStorage(fgkLocalUri)
+ if (!(AliCDBManager::Instance()->GetStorage(localUri))) {
+ Log("SHUTTLE", Form("StoreLocally - Cannot activate local %s storage", cdbType));
+ } else {
+ result = AliCDBManager::Instance()->GetStorage(localUri)
->Put(object, id, metaData);
+ }
- AliCDBManager::Instance()->SetDefaultStorage(origStorage);
+ if(!result) {
- if(result) {
- result = 2;
- }else{
- Log(detector, "Can't store data!");
- }
+ Log(fCurrentDetector, Form("StoreLocally - Can't store object <%s>!", id.ToString().Data()));
}
+
return result;
+}
+//______________________________________________________________________________________________
+Bool_t AliShuttle::StoreOCDB()
+{
+ //
+ // Called when preprocessor ends successfully or when previous storage attempt failed (kStoreError status)
+ // Calls underlying StoreOCDB(const char*) function twice, for OCDB and Reference storage.
+ // Then calls StoreRefFilesToGrid to store reference files.
+ //
+
+ if (fTestMode & kErrorGrid)
+ {
+ Log("SHUTTLE", "StoreOCDB - In TESTMODE - Simulating error while storing in the Grid");
+ Log(fCurrentDetector, "StoreOCDB - In TESTMODE - Simulating error while storing in the Grid");
+ return kFALSE;
+ }
+
+ AliInfo("Storing reference data ...");
+ Bool_t resultRef = StoreOCDB(fgkMainRefStorage);
+
+ AliInfo("Storing reference files ...");
+ Bool_t resultRefFiles = StoreRefFilesToGrid();
+
+ AliInfo("Storing OCDB data ...");
+ Bool_t resultCDB = StoreOCDB(fgkMainCDB);
+
+ return resultCDB && resultRef && resultRefFiles;
}
//______________________________________________________________________________________________
-Bool_t AliShuttle::Process(Int_t run, UInt_t startTime, UInt_t endTime)
+Bool_t AliShuttle::StoreOCDB(const TString& gridURI)
{
//
- // Makes data retrieval for all detectors in the configuration.
- // run: is the run number used
- // startTime: is the run start time
- // endTime: is the run end time
- // Returns kFALSE in case of error occured and kTRUE otherwise
+ // Called by StoreOCDB(), performs actual storage to the main OCDB and reference storages (Grid)
//
- AliInfo(Form("\n\n ^*^*^*^*^*^* Processing run %d ^*^*^*^*^*^*", run));
+ TObjArray* gridIds=0;
+
+ Bool_t result = kTRUE;
+ // to check whether all files have been transferred, or some files were left behind
+ // because the run is not first unprocessed
+ Bool_t willDoAgain = kFALSE;
+
+ const char* type = 0;
+ TString localURI;
+ if(gridURI == fgkMainCDB) {
+ type = "OCDB";
+ localURI = fgkLocalCDB;
+ } else if(gridURI == fgkMainRefStorage) {
+ type = "reference";
+ localURI = fgkLocalRefStorage;
+ } else {
+ AliError(Form("Invalid storage URI: %s", gridURI.Data()));
+ return kFALSE;
+ }
- // Initialization
- ClearLog();
- Bool_t hasError = kFALSE;
- for(Int_t iSys=0;iSys<3;iSys++) fFESCalled[iSys]=kFALSE;
- fCurrentRun = run;
- fCurrentStartTime = startTime;
- fCurrentEndTime = endTime;
+ AliCDBManager* man = AliCDBManager::Instance();
- // Loop on detectors in the configuration
- TIter iter(fConfig->GetDetectors());
- TObjString* aDetector;
+ AliCDBStorage *gridSto = man->GetStorage(gridURI);
+ if(!gridSto) {
+ Log("SHUTTLE",
+ Form("StoreOCDB - cannot activate main %s storage", type));
+ return kFALSE;
+ }
- while ((aDetector = (TObjString*) iter.Next())) {
- Bool_t detectorError=kFALSE;
- if(!fConfig->HostProcessDetector(aDetector->GetName())) continue;
- if(!Process(run, startTime, endTime, aDetector->String())) {
- hasError = kTRUE;
- detectorError=kTRUE;
+ gridIds = gridSto->GetQueryCDBList();
+
+ // get objects previously stored in local CDB
+ AliCDBStorage *localSto = man->GetStorage(localURI);
+ if(!localSto) {
+ Log("SHUTTLE",
+ Form("StoreOCDB - cannot activate local %s storage", type));
+ return kFALSE;
+ }
+ AliCDBPath aPath(GetOfflineDetName(fCurrentDetector.Data()),"*","*");
+ // Local objects were stored with current run as Grid version!
+ TList* localEntries = localSto->GetAll(aPath.GetPath(), GetCurrentRun(), GetCurrentRun());
+ localEntries->SetOwner(1);
+
+ // loop on local stored objects
+ TIter localIter(localEntries);
+ AliCDBEntry *aLocEntry = 0;
+ while((aLocEntry = dynamic_cast<AliCDBEntry*> (localIter.Next()))){
+ aLocEntry->SetOwner(1);
+ AliCDBId aLocId = aLocEntry->GetId();
+ aLocEntry->SetVersion(-1);
+ aLocEntry->SetSubVersion(-1);
+
+ // If local object is valid up to infinity we store it only if it is
+ // the first unprocessed run!
+ if (aLocId.GetLastRun() == AliCDBRunRange::Infinity() &&
+ !fFirstUnprocessed[GetDetPos(fCurrentDetector)])
+ {
+ Log("SHUTTLE", Form("StoreOCDB - %s: object %s has validity infinite but "
+ "there are previous unprocessed runs!",
+ fCurrentDetector.Data(), aLocId.GetPath().Data()));
+ willDoAgain=kTRUE;
continue;
}
- AliInfo(Form("Process ended successfully for detector %s!",aDetector->GetName()));
- // Process successful: Update time_processed field in FES logbooks!
- if(fFESCalled[kDAQ]) {
- hasError = (UpdateDAQTable(aDetector->GetName()) == kFALSE);
- fFESlist[kDAQ].Clear();
+ // loop on Grid valid Id's
+ Bool_t store = kTRUE;
+ TIter gridIter(gridIds);
+ AliCDBId* aGridId = 0;
+ while((aGridId = dynamic_cast<AliCDBId*> (gridIter.Next()))){
+ if(aGridId->GetPath() != aLocId.GetPath()) continue;
+ // skip all objects valid up to infinity
+ if(aGridId->GetLastRun() == AliCDBRunRange::Infinity()) continue;
+ // if we get here, it means there's already some more recent object stored on Grid!
+ store = kFALSE;
+ break;
}
- //if(fFESCalled[kDCS]) {
- // hasError = UpdateDCSTable(aDetector->GetName());
- // fFESlist[kDCS].Clear();
- //}
- //if(fFESCalled[kHLT]) {
- // hasError = UpdateHLTTable(aDetector->GetName());
- // fFESlist[kHLT].Clear();
- //}
- }
- if(fLog != "") StoreLog(run);
- fCurrentRun = -1;
- fCurrentStartTime = 0;
- fCurrentEndTime = 0;
+ // If we get here, the file can be stored!
+ Bool_t storeOk = gridSto->Put(aLocEntry);
+ if(!store || storeOk){
+
+ if (!store)
+ {
+ Log(fCurrentDetector.Data(),
+ Form("StoreOCDB - A more recent object already exists in %s storage: <%s>",
+ type, aGridId->ToString().Data()));
+ } else {
+ Log("SHUTTLE",
+ Form("StoreOCDB - Object <%s> successfully put into %s storage",
+ aLocId.ToString().Data(), type));
+ }
+
+ // removing local filename...
+ TString filename;
+ localSto->IdToFilename(aLocId, filename);
+ AliInfo(Form("Removing local file %s", filename.Data()));
+ RemoveFile(filename.Data());
+ continue;
+ } else {
+ Log("SHUTTLE",
+ Form("StoreOCDB - Grid %s storage of object <%s> failed",
+ type, aLocId.ToString().Data()));
+ result = kFALSE;
+ }
+ }
+ localEntries->Clear();
+
+ if(result && willDoAgain) {
+ Log(fCurrentDetector.Data(),
+ "Some files have been left on local storage, will try again later!");
+ result = kFALSE;
+ }
- return hasError == kFALSE;
+ return result;
}
//______________________________________________________________________________________________
-Bool_t AliShuttle::Process(Int_t run, UInt_t startTime, UInt_t endTime,
- const char* detector)
+Bool_t AliShuttle::StoreReferenceFile(const char* detector, const char* localFile, const char* gridFileName)
{
//
- // Makes data retrieval just for one specific detector.
- // Threre should be a configuration for this detector.
- // run: is the run number used
- // startTime: is the run start time
- // endTime: is the run end time
- // detector: detector for which the retrieval will be made
- // Returns kFALSE in case of error occured and kTRUE otherwise
+ // Stores reference file directly (without opening it). This function stores the file locally.
//
-
- AliInfo(Form("Retrieving values for %s, run %d", detector, run));
-
- if (!fConfig->HasDetector(detector)) {
- Log(detector, "There isn't any configuration for %s !");
+ // The file is stored under the following location:
+ // <base folder of local reference storage>/<DET>/<RUN#>_<gridFileName>
+ // where <gridFileName> is the second parameter given to the function
+ //
+
+ if (fTestMode & kErrorStorage)
+ {
+ Log(fCurrentDetector, "StoreReferenceFile - In TESTMODE - Simulating error while storing locally");
return kFALSE;
}
-
- TString host(fConfig->GetDCSHost(detector));
- Int_t port = fConfig->GetDCSPort(detector);
-
- TIter iter(fConfig->GetDCSAliases(detector));
- TObjString* anAlias;
- TMap aliasMap;
-
- Bool_t hasError = kFALSE;
- Bool_t result=kFALSE;
-
- while ((anAlias = (TObjString*) iter.Next())) {
- TObjArray valueSet;
- result = GetValueSet(host, port, anAlias->String(), valueSet);
- //AliInfo(Form("Port = %d",port));
- //result = kTRUE;
- if(result) {
- aliasMap.Add(anAlias->Clone(), valueSet.Clone());
- }else{
- TString message = Form("Error while retrieving alias %s !",
- anAlias->GetName());
- Log(detector, message.Data());
- hasError = kTRUE;
+
+ AliCDBManager* man = AliCDBManager::Instance();
+ AliCDBStorage* sto = man->GetStorage(fgkLocalRefStorage);
+
+ TString localBaseFolder = sto->GetBaseFolder();
+
+ TString targetDir;
+ targetDir.Form("%s/%s", localBaseFolder.Data(), detector);
+
+ TString target;
+ target.Form("%s/%d_%s", targetDir.Data(), GetCurrentRun(), gridFileName);
+
+ Int_t result = gSystem->GetPathInfo(targetDir, 0, (Long64_t*) 0, 0, 0);
+ if (result)
+ {
+ result = gSystem->mkdir(targetDir, kTRUE);
+ if (result != 0)
+ {
+ Log("SHUTTLE", Form("StoreReferenceFile - Error creating base directory %s", targetDir.Data()));
+ return kFALSE;
}
}
+
+ result = gSystem->CopyFile(localFile, target);
- // even if hasError is TRUE the Shuttle should keep on processing the detector (calib files!)
-
- if(hasError) return kFALSE;
- // TODO if(hasError) mark DCS error
-
- AliPreprocessor* aPreprocessor =
- dynamic_cast<AliPreprocessor*> (fPreprocessorMap.GetValue(detector));
- if(aPreprocessor)
+ if (result == 0)
{
- aPreprocessor->Initialize(run, startTime, endTime);
- hasError = (aPreprocessor->Process(&aliasMap) == 0);
- }else{
- // TODO default behaviour?
- AliInfo(Form("No Preprocessor for %s: storing TMap of DP arrays into CDB!",detector));
- AliCDBMetaData metaData;
- AliDCSValue dcsValue(startTime, endTime);
- metaData.SetResponsible(Form("Duck, Donald"));
- metaData.SetProperty("StartEndTime", &dcsValue);
- metaData.SetComment("Automatically stored by Shuttle!");
- hasError = (Store(detector, &aliasMap, &metaData) == 0);
+ Log("SHUTTLE", Form("StoreReferenceFile - Stored file %s locally to %s", localFile, target.Data()));
+ return kTRUE;
}
-
-
- aliasMap.Delete();
-
- return hasError == kFALSE;
+ else
+ {
+ Log("SHUTTLE", Form("StoreReferenceFile - Storing file %s locally to %s failed", localFile, target.Data()));
+ return kFALSE;
+ }
}
//______________________________________________________________________________________________
-Bool_t AliShuttle::GetValueSet(const char* host, Int_t port, const char* alias,
- TObjArray& valueSet)
+Bool_t AliShuttle::StoreRefFilesToGrid()
{
-// Retrieve all "alias" data points from the DCS server
-// host, port: TSocket connection parameters
-// alias: name of the alias
-// valueSet: array of retrieved AliDCSValue's
-
- AliDCSClient client(host, port, fTimeout, fRetries);
- if (!client.IsConnected()) {
+ //
+ // Transfers the reference file to the Grid.
+ //
+ // The file is stored under the following location:
+ // <base folder of reference storage>/<DET>/<RUN#>_<gridFileName>
+ // where <gridFileName> is the second parameter given to the function
+ //
+
+ AliCDBManager* man = AliCDBManager::Instance();
+ AliCDBStorage* sto = man->GetStorage(fgkLocalRefStorage);
+ if (!sto)
+ return kFALSE;
+ TString localBaseFolder = sto->GetBaseFolder();
+
+ TString dir;
+ dir.Form("%s/%s", localBaseFolder.Data(), GetOfflineDetName(fCurrentDetector));
+
+ AliCDBStorage* gridSto = man->GetStorage(fgkMainRefStorage);
+ if (!gridSto)
return kFALSE;
+ TString gridBaseFolder = gridSto->GetBaseFolder();
+ TString alienDir;
+ alienDir.Form("%s%s", gridBaseFolder.Data(), GetOfflineDetName(fCurrentDetector));
+
+ if (!gGrid)
+ return kFALSE;
+
+ TString begin;
+ begin.Form("%d_", GetCurrentRun());
+
+ TSystemDirectory* baseDir = new TSystemDirectory("/", dir);
+ if (!baseDir)
+ return kTRUE;
+
+ TList* dirList = baseDir->GetListOfFiles();
+ if (!dirList)
+ {
+ delete baseDir;
+ return kTRUE;
}
-
- Int_t result = client.GetAliasValues(alias,
- GetCurrentStartTime(), GetCurrentEndTime(), valueSet);
-
- if (result < 0) {
- AliError(Form("Can't get '%s'! Reason: %s",
- alias, AliDCSClient::GetErrorString(result)));
-
- if (result == AliDCSClient::fgkServerError) {
- AliError(Form("Server error: %s",
- client.GetServerError().Data()));
+
+ Int_t nDirs = dirList->GetEntries();
+
+ Bool_t success = kTRUE;
+ Bool_t first = kTRUE;
+
+ for (Int_t iDir=0; iDir<nDirs; ++iDir)
+ {
+ TSystemFile* entry = dynamic_cast<TSystemFile*> (dirList->At(iDir));
+ if (!entry)
+ continue;
+
+ if (entry->IsDirectory())
+ continue;
+
+ TString fileName(entry->GetName());
+ if (!fileName.BeginsWith(begin))
+ continue;
+
+ if (first)
+ {
+ first = kFALSE;
+ // check that DET folder exists, otherwise create it
+ TGridResult* result = gGrid->Ls(alienDir.Data(), "a");
+
+ if (!result)
+ return kFALSE;
+
+ if (!result->GetFileName(0))
+ {
+ if (!gGrid->Mkdir(alienDir.Data(),"",0))
+ {
+ Log("SHUTTLE", Form("StoreRefFilesToGrid - Cannot create directory %s",
+ alienDir.Data()));
+ delete baseDir;
+ return kFALSE;
+ }
+
+ }
+ }
+
+ TString fullLocalPath;
+ fullLocalPath.Form("%s/%s", dir.Data(), fileName.Data());
+
+ TString fullGridPath;
+ fullGridPath.Form("alien://%s/%s", alienDir.Data(), fileName.Data());
+
+ Log("SHUTTLE", Form("StoreRefFilesToGrid - Copying local file %s to %s", fullLocalPath.Data(), fullGridPath.Data()));
+
+ TFileMerger fileMerger;
+ Bool_t result = fileMerger.Cp(fullLocalPath, fullGridPath);
+
+ if (result)
+ {
+ Log("SHUTTLE", Form("StoreRefFilesToGrid - Copying local file %s to %s succeeded", fullLocalPath.Data(), fullGridPath.Data()));
+ RemoveFile(fullLocalPath);
+ }
+ else
+ {
+ Log("SHUTTLE", Form("StoreRefFilesToGrid - Copying local file %s to %s failed", fullLocalPath.Data(), fullGridPath.Data()));
+ success = kFALSE;
}
-
- return kFALSE;
}
-
- return kTRUE;
+
+ delete baseDir;
+
+ return success;
}
//______________________________________________________________________________________________
-const char* AliShuttle::GetFile(Int_t system, const char* detector,
- const char* id, const char* source)
+void AliShuttle::CleanLocalStorage(const TString& uri)
{
-// Get calibration file from file exchange servers
-// calls specific getter according to system index (kDAQ, kDCS, kHLT)
+ //
+ // Called in case the preprocessor is declared failed. Remove remaining objects from the local storages.
+ //
- switch(system){
- case kDAQ:
- return GetDAQFileName(detector, id, source);
- break;
- case kDCS:
- return GetDCSFileName(detector, id, source);
- break;
- case kHLT:
- return GetHLTFileName(detector, id, source);
- break;
- default:
- AliError(Form("No valid system index: %d",system));
+ const char* type = 0;
+ if(uri == fgkLocalCDB) {
+ type = "OCDB";
+ } else if(uri == fgkLocalRefStorage) {
+ type = "reference";
+ } else {
+ AliError(Form("Invalid storage URI: %s", uri.Data()));
+ return;
}
- return 0;
-}
-
-//______________________________________________________________________________________________
-TList* AliShuttle::GetFileSources(Int_t system, const char* detector, const char* id)
-{
-// Get sources producing the condition file Id from file exchange servers
-// calls specific getter according to system index (kDAQ, kDCS, kHLT)
+ AliCDBManager* man = AliCDBManager::Instance();
- switch(system){
- case kDAQ:
- return GetDAQFileSources(detector, id);
- break;
- case kDCS:
- return GetDCSFileSources(detector, id);
- break;
- case kHLT:
- return GetHLTFileSources(detector, id);
- break;
- default:
- AliError(Form("No valid system index: %d",system));
+ // open local storage
+ AliCDBStorage *localSto = man->GetStorage(uri);
+ if(!localSto) {
+ Log("SHUTTLE",
+ Form("CleanLocalStorage - cannot activate local %s storage", type));
+ return;
}
- return NULL;
-}
+ TString filename(Form("%s/%s/*/Run*_v%d_s*.root",
+ localSto->GetBaseFolder().Data(), fCurrentDetector.Data(), GetCurrentRun()));
-//______________________________________________________________________________________________
-Bool_t AliShuttle::Connect(Int_t system){
-// Connect to MySQL Server of the system's FES logbook
+ AliInfo(Form("filename = %s", filename.Data()));
- // check connection: if already connected return
- if(fServer[system] && fServer[system]->IsConnected()) return kTRUE;
+ AliInfo(Form("Removing remaining local files from run %d and detector %s ...",
+ GetCurrentRun(), fCurrentDetector.Data()));
- TString aFESlbHost= Form("mysql://%s", fConfig->GetFESlbHost(system));
+ RemoveFile(filename.Data());
- fServer[system] = TSQLServer::Connect(aFESlbHost,
- fConfig->GetFESlbUser(system),
- fConfig->GetFESlbPass(system));
- if (!fServer[system] || !fServer[system]->IsConnected()) {
- AliError(Form("Can't establish connection to FES logbook for %s !",fkSystemNames[system]));
- return kFALSE;
- }
+}
- // Get tables
- // TODO in the configuration should the table name be there too?
- switch(system){
- case kDAQ:
- fServer[kDAQ]->GetTables("REFSYSLOG");
- break;
- case kDCS:
- //fServer[kDCS]->GetTables("REFSYSLOG");
- break;
- case kHLT:
- //fServer[kHLT]->GetTables("REFSYSLOG");
- break;
- default:
- break;
- }
+//______________________________________________________________________________________________
+void AliShuttle::RemoveFile(const char* filename)
+{
+ //
+ // removes local file
+ //
- return kTRUE;
+ TString command(Form("rm -f %s", filename));
+
+ Int_t result = gSystem->Exec(command.Data());
+ if(result != 0)
+ {
+ Log("SHUTTLE", Form("RemoveFile - %s: Cannot remove file %s!",
+ fCurrentDetector.Data(), filename));
+ }
}
//______________________________________________________________________________________________
-const char* AliShuttle::GetDAQFileName(const char* detector, const char* id, const char* source){
-// Retrieves a file from the DAQ FES.
-// First queris the DAQ logbook_fs for the DAQ file name, using the run, detector, id and source info
-// then calls RetrieveDAQFile(DAQfilename) for actual copy to local disk
-// run: current run being processed (fCurrentRun)
-// detector: comes from the Preprocessor name (must be converted into detector code with GetDetCode)
-// id: provided as a parameter by the Preprocessor
-// source: provided by the Preprocessor through GetFileSources function
+AliShuttleStatus* AliShuttle::ReadShuttleStatus()
+{
+ //
+ // Reads the AliShuttleStatus from the CDB
+ //
- // check connection, in case connect
- if(!Connect(kDAQ)){
- Log(detector, "GetDAQFileName: Couldn't connect to DAQ Logbook !");
- return 0;
+ if (fStatusEntry){
+ delete fStatusEntry;
+ fStatusEntry = 0;
}
- // Query preparation
- TString sqlQueryStart = "select filePath from logbook_fs where";
- TString whereClause = Form("run=%d and detector=\"%s\" and fileId=\"%s\" and DAQsource=\"%s\"",
- fCurrentRun, GetDetCode(detector), id, source);
- TString sqlQuery = Form("%s %s", sqlQueryStart.Data(), whereClause.Data());
+ fStatusEntry = AliCDBManager::Instance()->GetStorage(GetLocalCDB())
+ ->Get(Form("/SHUTTLE/STATUS/%s", fCurrentDetector.Data()), GetCurrentRun());
- AliInfo(Form("SQL query: \n%s",sqlQuery.Data()));
-
- // Query execution
- TSQLResult* aResult;
- aResult = fServer[kDAQ]->Query(sqlQuery);
- if (!aResult) {
- Log(detector, Form("Can't execute query <%s>!", sqlQuery.Data()));
- return 0;
- }
+ if (!fStatusEntry) return 0;
+ fStatusEntry->SetOwner(1);
- if (aResult->GetRowCount() == 0) {
- Log(detector,
- Form("GetDAQFileName: No result from SQL query <%s>!", sqlQuery.Data()));
- delete aResult;
+ AliShuttleStatus* status = dynamic_cast<AliShuttleStatus*> (fStatusEntry->GetObject());
+ if (!status) {
+ AliError("Invalid object stored to CDB!");
return 0;
}
- if (aResult->GetRowCount() >1) {
- Log(detector,
- Form("GetDAQFileName: More than one row resulting from SQL query <%s>!", sqlQuery.Data()));
- delete aResult;
- return 0;
- }
+ return status;
+}
- TSQLRow* aRow = aResult->Next();
+//______________________________________________________________________________________________
+Bool_t AliShuttle::WriteShuttleStatus(AliShuttleStatus* status)
+{
+ //
+ // writes the status for one subdetector
+ //
- if(!aRow){
- Log(detector, Form("GetDAQFileName: Empty set result from query <%s>!", sqlQuery.Data()));
- delete aResult;
- return 0;
+ if (fStatusEntry){
+ delete fStatusEntry;
+ fStatusEntry = 0;
}
- TString filePath(aRow->GetField(0), aRow->GetFieldLength(0));
+ Int_t run = GetCurrentRun();
- delete aResult;
+ AliCDBId id(AliCDBPath("SHUTTLE", "STATUS", fCurrentDetector), run, run);
- AliInfo(Form("filePath = %s",filePath.Data()));
+ fStatusEntry = new AliCDBEntry(status, id, new AliCDBMetaData);
+ fStatusEntry->SetOwner(1);
- // retrieved file is renamed to make it unique
- TString localFileName = Form("%s_%d_%s_%s.shuttle",
- detector, fCurrentRun, id, source);
+ UInt_t result = AliCDBManager::Instance()->GetStorage(fgkLocalCDB)->Put(fStatusEntry);
- // file retrieval from DAQ FES
- Bool_t result = RetrieveDAQFile(filePath.Data(), localFileName.Data());
- if(!result) {
- Log(detector, Form("copying file %s from DAQ FES failed!", filePath.Data()));
- return 0;
- } else {
- AliInfo(Form("File %s copied from DAQ FES into %s/%s !",
- filePath.Data(), fgkShuttleTempDir, localFileName.Data()));
+ if (!result) {
+ Log("SHUTTLE", Form("WriteShuttleStatus - Failed for %s, run %d",
+ fCurrentDetector.Data(), run));
+ return kFALSE;
}
+
+ SendMLInfo();
-
- fFESCalled[kDAQ]=kTRUE;
- TObjString *fileParams = new TObjString(Form("%s_!?!_%s", id, source));
- fFESlist[kDAQ].Add(fileParams);
-
- return localFileName.Data();
-
+ return kTRUE;
}
//______________________________________________________________________________________________
-Bool_t AliShuttle::RetrieveDAQFile(const char* daqFileName, const char* localFileName){
+void AliShuttle::UpdateShuttleStatus(AliShuttleStatus::Status newStatus, Bool_t increaseCount)
+{
+ //
+ // changes the AliShuttleStatus for the given detector and run to the given status
+ //
- // check temp directory: trying to cd to temp; if it does not exist, create it
- AliInfo(Form("Copy file %s from DAQ FES into folder %s and rename it as %s",
- daqFileName,fgkShuttleTempDir, localFileName));
+ if (!fStatusEntry){
+ AliError("UNEXPECTED: fStatusEntry empty");
+ return;
+ }
- void* dir = gSystem->OpenDirectory(fgkShuttleTempDir);
- if (dir == NULL) {
- if (gSystem->mkdir(fgkShuttleTempDir, kTRUE)) {
- AliError(Form("Can't open directory <%s>!", fgkShuttleTempDir));
- return kFALSE;
- }
+ AliShuttleStatus* status = dynamic_cast<AliShuttleStatus*> (fStatusEntry->GetObject());
- } else {
- gSystem->FreeDirectory(dir);
+ if (!status){
+ Log("SHUTTLE", "UNEXPECTED: status could not be read from current CDB entry");
+ return;
}
- TString baseDAQFESFolder = "DAQ";
- TString command = Form("scp %s@%s:%s/%s %s/%s",
- fConfig->GetFESUser(kDAQ),
- fConfig->GetFESHost(kDAQ),
- baseDAQFESFolder.Data(),
- daqFileName,
- fgkShuttleTempDir,
- localFileName);
+ TString actionStr = Form("UpdateShuttleStatus - %s: Changing state from %s to %s",
+ fCurrentDetector.Data(),
+ status->GetStatusName(),
+ status->GetStatusName(newStatus));
+ Log("SHUTTLE", actionStr);
+ SetLastAction(actionStr);
- AliInfo(Form("%s",command.Data()));
+ status->SetStatus(newStatus);
+ if (increaseCount) status->IncreaseCount();
- UInt_t nRetries = 0;
- UInt_t maxRetries = 3;
+ AliCDBManager::Instance()->GetStorage(fgkLocalCDB)->Put(fStatusEntry);
- // copy!! if successful TSystem::Exec returns 0
- while(nRetries++ < maxRetries) {
- AliInfo(Form("Trying to copy file. Retry # %d", nRetries));
- if(gSystem->Exec(command.Data()) == 0) return kTRUE;
+ SendMLInfo();
+}
+
+//______________________________________________________________________________________________
+void AliShuttle::SendMLInfo()
+{
+ //
+ // sends ML information about the current status of the current detector being processed
+ //
+
+ AliShuttleStatus* status = dynamic_cast<AliShuttleStatus*> (fStatusEntry->GetObject());
+
+ if (!status){
+ Log("SHUTTLE", "SendMLInfo - UNEXPECTED: status could not be read from current CDB entry");
+ return;
}
+
+ TMonaLisaText mlStatus(Form("%s_status", fCurrentDetector.Data()), status->GetStatusName());
+ TMonaLisaValue mlRetryCount(Form("%s_count", fCurrentDetector.Data()), status->GetCount());
- return kFALSE;
+ TList mlList;
+ mlList.Add(&mlStatus);
+ mlList.Add(&mlRetryCount);
+ fMonaLisa->SendParameters(&mlList);
}
//______________________________________________________________________________________________
-TList* AliShuttle::GetDAQFileSources(const char* detector, const char* id){
-// Retrieves a file from the DCS FES.
+Bool_t AliShuttle::ContinueProcessing()
+{
+ // this function reads the AliShuttleStatus information from CDB and
+ // checks if the processing should be continued
+ // if yes it returns kTRUE and updates the AliShuttleStatus with nextStatus
- // check connection, in case connect
- if(!Connect(kDAQ)){
- Log(detector, "GetDAQFileName: Couldn't connect to DAQ Logbook !");
+ if (!fConfig->HostProcessDetector(fCurrentDetector)) return kFALSE;
+
+ AliPreprocessor* aPreprocessor =
+ dynamic_cast<AliPreprocessor*> (fPreprocessorMap.GetValue(fCurrentDetector));
+ if (!aPreprocessor)
+ {
+ AliInfo(Form("%s: no preprocessor registered", fCurrentDetector.Data()));
+ return kFALSE;
+ }
+
+ AliShuttleLogbookEntry::Status entryStatus =
+ fLogbookEntry->GetDetectorStatus(fCurrentDetector);
+
+ if(entryStatus != AliShuttleLogbookEntry::kUnprocessed) {
+ AliInfo(Form("ContinueProcessing - %s is %s",
+ fCurrentDetector.Data(),
+ fLogbookEntry->GetDetectorStatusName(entryStatus)));
+ return kFALSE;
+ }
+
+ // if we get here, according to Shuttle logbook subdetector is in UNPROCESSED state
+
+ // check if current run is first unprocessed run for current detector
+ if (fConfig->StrictRunOrder(fCurrentDetector) &&
+ !fFirstUnprocessed[GetDetPos(fCurrentDetector)])
+ {
+ Log("SHUTTLE", Form("ContinueProcessing - %s requires strict run ordering but this is not the first unprocessed run!"));
+ return kFALSE;
+ }
+
+ AliShuttleStatus* status = ReadShuttleStatus();
+ if (!status) {
+ // first time
+ Log("SHUTTLE", Form("ContinueProcessing - %s: Processing first time",
+ fCurrentDetector.Data()));
+ status = new AliShuttleStatus(AliShuttleStatus::kStarted);
+ return WriteShuttleStatus(status);
+ }
+
+ // The following two cases shouldn't happen if Shuttle Logbook was correctly updated.
+ // If it happens it may mean Logbook updating failed... let's do it now!
+ if (status->GetStatus() == AliShuttleStatus::kDone ||
+ status->GetStatus() == AliShuttleStatus::kFailed){
+ Log("SHUTTLE", Form("ContinueProcessing - %s is already %s. Updating Shuttle Logbook",
+ fCurrentDetector.Data(),
+ status->GetStatusName(status->GetStatus())));
+ UpdateShuttleLogbook(fCurrentDetector.Data(),
+ status->GetStatusName(status->GetStatus()));
+ return kFALSE;
+ }
+
+ if (status->GetStatus() == AliShuttleStatus::kStoreError) {
+ Log("SHUTTLE",
+ Form("ContinueProcessing - %s: Grid storage of one or more objects failed. Trying again now",
+ fCurrentDetector.Data()));
+ UpdateShuttleStatus(AliShuttleStatus::kStoreStarted);
+ if (StoreOCDB()){
+ Log("SHUTTLE", Form("ContinueProcessing - %s: all objects successfully stored into main storage",
+ fCurrentDetector.Data()));
+ UpdateShuttleStatus(AliShuttleStatus::kDone);
+ UpdateShuttleLogbook(fCurrentDetector.Data(), "DONE");
+ } else {
+ Log("SHUTTLE",
+ Form("ContinueProcessing - %s: Grid storage failed again",
+ fCurrentDetector.Data()));
+ UpdateShuttleStatus(AliShuttleStatus::kStoreError);
+ }
+ return kFALSE;
+ }
+
+ // if we get here, there is a restart
+ Bool_t cont = kFALSE;
+
+ // abort conditions
+ if (status->GetCount() >= fConfig->GetMaxRetries()) {
+ Log("SHUTTLE", Form("ContinueProcessing - %s failed %d times in status %s - "
+ "Updating Shuttle Logbook", fCurrentDetector.Data(),
+ status->GetCount(), status->GetStatusName()));
+ UpdateShuttleLogbook(fCurrentDetector.Data(), "FAILED");
+ UpdateShuttleStatus(AliShuttleStatus::kFailed);
+
+ // there may still be objects in local OCDB and reference storage
+ // and FXS databases may be not updated: do it now!
+
+ // TODO Currently disabled, we want to keep files in case of failure!
+ // CleanLocalStorage(fgkLocalCDB);
+ // CleanLocalStorage(fgkLocalRefStorage);
+ // UpdateTableFailCase();
+
+ // Send mail to detector expert!
+ AliInfo(Form("Sending mail to %s expert...", fCurrentDetector.Data()));
+ if (!SendMail())
+ Log("SHUTTLE", Form("ContinueProcessing - Could not send mail to %s expert",
+ fCurrentDetector.Data()));
+
+ } else {
+ Log("SHUTTLE", Form("ContinueProcessing - %s: restarting. "
+ "Aborted before with %s. Retry number %d.", fCurrentDetector.Data(),
+ status->GetStatusName(), status->GetCount()));
+ Bool_t increaseCount = kTRUE;
+ if (status->GetStatus() == AliShuttleStatus::kDCSError || status->GetStatus() == AliShuttleStatus::kDCSStarted)
+ increaseCount = kFALSE;
+ UpdateShuttleStatus(AliShuttleStatus::kStarted, increaseCount);
+ cont = kTRUE;
+ }
+
+ return cont;
+}
+
+//______________________________________________________________________________________________
+Bool_t AliShuttle::Process(AliShuttleLogbookEntry* entry)
+{
+ //
+ // Makes data retrieval for all detectors in the configuration.
+ // entry: Shuttle logbook entry, contains run paramenters and status of detectors
+ // (Unprocessed, Inactive, Failed or Done).
+ // Returns kFALSE in case of error occured and kTRUE otherwise
+ //
+
+ if (!entry) return kFALSE;
+
+ fLogbookEntry = entry;
+
+ AliInfo(Form("\n\n \t\t\t^*^*^*^*^*^*^*^*^*^*^*^* run %d: START ^*^*^*^*^*^*^*^*^*^*^*^* \n",
+ GetCurrentRun()));
+
+ // create ML instance that monitors this run
+ fMonaLisa = new TMonaLisaWriter(Form("%d", GetCurrentRun()), "SHUTTLE", "aliendb1.cern.ch");
+ // disable monitoring of other parameters that come e.g. from TFile
+ gMonitoringWriter = 0;
+
+ // Send the information to ML
+ TMonaLisaText mlStatus("SHUTTLE_status", "Processing");
+ TMonaLisaText mlRunType("SHUTTLE_runtype", Form("%s (%s)", entry->GetRunType(), entry->GetRunParameter("log")));
+
+ TList mlList;
+ mlList.Add(&mlStatus);
+ mlList.Add(&mlRunType);
+
+ fMonaLisa->SendParameters(&mlList);
+
+ if (fLogbookEntry->IsDone())
+ {
+ Log("SHUTTLE","Process - Shuttle is already DONE. Updating logbook");
+ UpdateShuttleLogbook("shuttle_done");
+ fLogbookEntry = 0;
+ return kTRUE;
+ }
+
+ // read test mode if flag is set
+ if (fReadTestMode)
+ {
+ fTestMode = kNone;
+ TString logEntry(entry->GetRunParameter("log"));
+ //printf("log entry = %s\n", logEntry.Data());
+ TString searchStr("Testmode: ");
+ Int_t pos = logEntry.Index(searchStr.Data());
+ //printf("%d\n", pos);
+ if (pos >= 0)
+ {
+ TSubString subStr = logEntry(pos + searchStr.Length(), logEntry.Length());
+ //printf("%s\n", subStr.String().Data());
+ TString newStr(subStr.Data());
+ TObjArray* token = newStr.Tokenize(' ');
+ if (token)
+ {
+ //token->Print();
+ TObjString* tmpStr = dynamic_cast<TObjString*> (token->First());
+ if (tmpStr)
+ {
+ Int_t testMode = tmpStr->String().Atoi();
+ if (testMode > 0)
+ {
+ Log("SHUTTLE", Form("Enabling test mode %d", testMode));
+ SetTestMode((TestMode) testMode);
+ }
+ }
+ delete token;
+ }
+ }
+ }
+
+ Log("SHUTTLE", Form("The test mode flag is %d", (Int_t) fTestMode));
+
+ fLogbookEntry->Print("all");
+
+ // Initialization
+ Bool_t hasError = kFALSE;
+
+ AliCDBStorage *mainCDBSto = AliCDBManager::Instance()->GetStorage(fgkMainCDB);
+ if(mainCDBSto) mainCDBSto->QueryCDB(GetCurrentRun());
+ AliCDBStorage *mainRefSto = AliCDBManager::Instance()->GetStorage(fgkMainRefStorage);
+ if(mainRefSto) mainRefSto->QueryCDB(GetCurrentRun());
+
+ // Loop on detectors in the configuration
+ TIter iter(fConfig->GetDetectors());
+ TObjString* aDetector = 0;
+
+ while ((aDetector = (TObjString*) iter.Next()))
+ {
+ fCurrentDetector = aDetector->String();
+
+ if (ContinueProcessing() == kFALSE) continue;
+
+ AliInfo(Form("\n\n \t\t\t****** run %d - %s: START ******",
+ GetCurrentRun(), aDetector->GetName()));
+
+ for(Int_t iSys=0;iSys<3;iSys++) fFXSCalled[iSys]=kFALSE;
+
+ Log(fCurrentDetector.Data(), "Starting processing");
+
+ Int_t pid = fork();
+
+ if (pid < 0)
+ {
+ Log("SHUTTLE", "ERROR: Forking failed");
+ }
+ else if (pid > 0)
+ {
+ // parent
+ AliInfo(Form("In parent process of %d - %s: Starting monitoring",
+ GetCurrentRun(), aDetector->GetName()));
+
+ Long_t begin = time(0);
+
+ int status; // to be used with waitpid, on purpose an int (not Int_t)!
+ while (waitpid(pid, &status, WNOHANG) == 0)
+ {
+ Long_t expiredTime = time(0) - begin;
+
+ if (expiredTime > fConfig->GetPPTimeOut())
+ {
+ TString tmp;
+ tmp.Form("Process of %s time out. Run time: %d seconds. Killing...",
+ fCurrentDetector.Data(), expiredTime);
+ Log("SHUTTLE", tmp);
+ Log(fCurrentDetector, tmp);
+
+ kill(pid, 9);
+
+ UpdateShuttleStatus(AliShuttleStatus::kPPTimeOut);
+ hasError = kTRUE;
+
+ gSystem->Sleep(1000);
+ }
+ else
+ {
+ gSystem->Sleep(1000);
+
+ TString checkStr;
+ checkStr.Form("ps -o vsize --pid %d | tail -n 1", pid);
+ FILE* pipe = gSystem->OpenPipe(checkStr, "r");
+ if (!pipe)
+ {
+ Log("SHUTTLE", Form("Error: Could not open pipe to %s", checkStr.Data()));
+ continue;
+ }
+
+ char buffer[100];
+ if (!fgets(buffer, 100, pipe))
+ {
+ Log("SHUTTLE", "Error: ps did not return anything");
+ gSystem->ClosePipe(pipe);
+ continue;
+ }
+ gSystem->ClosePipe(pipe);
+
+ //Log("SHUTTLE", Form("ps returned %s", buffer));
+
+ Int_t mem = 0;
+ if ((sscanf(buffer, "%d\n", &mem) != 1) || !mem)
+ {
+ Log("SHUTTLE", "Error: Could not parse output of ps");
+ continue;
+ }
+
+ if (expiredTime % 60 == 0)
+ Log("SHUTTLE", Form("%s: Checking process. Run time: %d seconds - Memory consumption: %d KB",
+ fCurrentDetector.Data(), expiredTime, mem));
+
+ if (mem > fConfig->GetPPMaxMem())
+ {
+ TString tmp;
+ tmp.Form("Process exceeds maximum allowed memory (%d KB > %d KB). Killing...",
+ mem, fConfig->GetPPMaxMem());
+ Log("SHUTTLE", tmp);
+ Log(fCurrentDetector, tmp);
+
+ kill(pid, 9);
+
+ UpdateShuttleStatus(AliShuttleStatus::kPPOutOfMemory);
+ hasError = kTRUE;
+
+ gSystem->Sleep(1000);
+ }
+ }
+ }
+
+ AliInfo(Form("In parent process of %d - %s: Client has terminated.",
+ GetCurrentRun(), aDetector->GetName()));
+
+ if (WIFEXITED(status))
+ {
+ Int_t returnCode = WEXITSTATUS(status);
+
+ Log("SHUTTLE", Form("%s: the return code is %d", fCurrentDetector.Data(),
+ returnCode));
+
+ if (returnCode == 0) hasError = kTRUE;
+ }
+ }
+ else if (pid == 0)
+ {
+ // client
+ AliInfo(Form("In client process of %d - %s", GetCurrentRun(), aDetector->GetName()));
+
+ AliInfo("Redirecting output...");
+
+ if ((freopen(GetLogFileName(fCurrentDetector), "w", stdout)) == 0)
+ {
+ Log("SHUTTLE", "Could not freopen stdout");
+ }
+ else
+ {
+ fOutputRedirected = kTRUE;
+ if ((dup2(fileno(stdout), fileno(stderr))) < 0)
+ Log("SHUTTLE", "Could not redirect stderr");
+
+ }
+
+ Bool_t success = ProcessCurrentDetector();
+ if (success) // Preprocessor finished successfully!
+ {
+ // Update time_processed field in FXS DB
+ if (UpdateTable() == kFALSE)
+ Log("SHUTTLE", Form("Process - %s: Could not update FXS databases!"));
+
+ // Transfer the data from local storage to main storage (Grid)
+ UpdateShuttleStatus(AliShuttleStatus::kStoreStarted);
+ if (StoreOCDB() == kFALSE)
+ {
+ AliInfo(Form("\n \t\t\t****** run %d - %s: STORAGE ERROR ****** \n\n",
+ GetCurrentRun(), aDetector->GetName()));
+ UpdateShuttleStatus(AliShuttleStatus::kStoreError);
+ success = kFALSE;
+ } else {
+ AliInfo(Form("\n \t\t\t****** run %d - %s: DONE ****** \n\n",
+ GetCurrentRun(), aDetector->GetName()));
+ UpdateShuttleStatus(AliShuttleStatus::kDone);
+ UpdateShuttleLogbook(fCurrentDetector, "DONE");
+ }
+ }
+
+ for (UInt_t iSys=0; iSys<3; iSys++)
+ {
+ if (fFXSCalled[iSys]) fFXSlist[iSys].Clear();
+ }
+
+ AliInfo(Form("Client process of %d - %s is exiting now with %d.",
+ GetCurrentRun(), aDetector->GetName(), success));
+
+ // the client exits here
+ gSystem->Exit(success);
+
+ AliError("We should never get here!!!");
+ }
+ }
+
+ AliInfo(Form("\n\n \t\t\t^*^*^*^*^*^*^*^*^*^*^*^* run %d: FINISH ^*^*^*^*^*^*^*^*^*^*^*^* \n",
+ GetCurrentRun()));
+
+ //check if shuttle is done for this run, if so update logbook
+ TObjArray checkEntryArray;
+ checkEntryArray.SetOwner(1);
+ TString whereClause = Form("where run=%d", GetCurrentRun());
+ if (!QueryShuttleLogbook(whereClause.Data(), checkEntryArray) || checkEntryArray.GetEntries() == 0) {
+ Log("SHUTTLE", Form("Process - Warning: Cannot check status of run %d on Shuttle logbook!",
+ GetCurrentRun()));
+ return hasError == kFALSE;
+ }
+
+ AliShuttleLogbookEntry* checkEntry = dynamic_cast<AliShuttleLogbookEntry*>
+ (checkEntryArray.At(0));
+
+ if (checkEntry)
+ {
+ if (checkEntry->IsDone())
+ {
+ Log("SHUTTLE","Process - Shuttle is DONE. Updating logbook");
+ UpdateShuttleLogbook("shuttle_done");
+ }
+ else
+ {
+ for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
+ {
+ if (checkEntry->GetDetectorStatus(iDet) == AliShuttleLogbookEntry::kUnprocessed)
+ {
+ AliDebug(2, Form("Run %d: setting %s as \"not first time unprocessed\"",
+ checkEntry->GetRun(), GetDetName(iDet)));
+ fFirstUnprocessed[iDet] = kFALSE;
+ }
+ }
+ }
+ }
+
+ // remove ML instance
+ delete fMonaLisa;
+ fMonaLisa = 0;
+
+ fLogbookEntry = 0;
+
+ return hasError == kFALSE;
+}
+
+//______________________________________________________________________________________________
+Bool_t AliShuttle::ProcessCurrentDetector()
+{
+ //
+ // Makes data retrieval just for a specific detector (fCurrentDetector).
+ // Threre should be a configuration for this detector.
+
+ AliInfo(Form("Retrieving values for %s, run %d", fCurrentDetector.Data(), GetCurrentRun()));
+
+ TMap dcsMap;
+ dcsMap.SetOwner(1);
+
+ Bool_t aDCSError = kFALSE;
+
+ // call preprocessor
+ AliPreprocessor* aPreprocessor =
+ dynamic_cast<AliPreprocessor*> (fPreprocessorMap.GetValue(fCurrentDetector));
+
+ aPreprocessor->Initialize(GetCurrentRun(), GetCurrentStartTime(), GetCurrentEndTime());
+
+ Bool_t processDCS = aPreprocessor->ProcessDCS();
+
+ if (!processDCS || (fTestMode & kSkipDCS))
+ {
+ Log(fCurrentDetector, "In TESTMODE - Skipping DCS processing!");
+ }
+ else if (fTestMode & kErrorDCS)
+ {
+ Log(fCurrentDetector, "In TESTMODE - Simulating DCS error");
+ UpdateShuttleStatus(AliShuttleStatus::kDCSStarted);
+ UpdateShuttleStatus(AliShuttleStatus::kDCSError);
+ return kFALSE;
+ } else {
+
+ UpdateShuttleStatus(AliShuttleStatus::kDCSStarted);
+
+ TString host(fConfig->GetDCSHost(fCurrentDetector));
+ Int_t port = fConfig->GetDCSPort(fCurrentDetector);
+
+ // Retrieval of Aliases
+ TObjString* anAlias = 0;
+ Int_t iAlias = 1;
+ Int_t nTotAliases= ((TMap*)fConfig->GetDCSAliases(fCurrentDetector))->GetEntries();
+ TIter iterAliases(fConfig->GetDCSAliases(fCurrentDetector));
+ while ((anAlias = (TObjString*) iterAliases.Next()))
+ {
+ TObjArray *valueSet = new TObjArray();
+ valueSet->SetOwner(1);
+
+ if (((iAlias-1) % 500) == 0 || iAlias == nTotAliases)
+ AliInfo(Form("Querying DCS archive: alias %s (%d of %d)",
+ anAlias->GetName(), iAlias++, nTotAliases));
+ aDCSError = (GetValueSet(host, port, anAlias->String(), valueSet, kAlias) == 0);
+
+ if(!aDCSError)
+ {
+ dcsMap.Add(anAlias->Clone(), valueSet);
+ } else {
+ Log(fCurrentDetector,
+ Form("ProcessCurrentDetector - Error while retrieving alias %s",
+ anAlias->GetName()));
+ UpdateShuttleStatus(AliShuttleStatus::kDCSError);
+ dcsMap.DeleteAll();
+ return kFALSE;
+ }
+ }
+
+ // Retrieval of Data Points
+ TObjString* aDP = 0;
+ Int_t iDP = 0;
+ Int_t nTotDPs= ((TMap*)fConfig->GetDCSDataPoints(fCurrentDetector))->GetEntries();
+ TIter iterDP(fConfig->GetDCSDataPoints(fCurrentDetector));
+ while ((aDP = (TObjString*) iterDP.Next()))
+ {
+ TObjArray *valueSet = new TObjArray();
+ valueSet->SetOwner(1);
+ if (((iDP-1) % 500) == 0 || iDP == nTotDPs)
+ AliInfo(Form("Querying DCS archive: DP %s (%d of %d)",
+ aDP->GetName(), iDP++, nTotDPs));
+ aDCSError = (GetValueSet(host, port, aDP->String(), valueSet, kDP) == 0);
+
+ if(!aDCSError)
+ {
+ dcsMap.Add(aDP->Clone(), valueSet);
+ } else {
+ Log(fCurrentDetector,
+ Form("ProcessCurrentDetector - Error while retrieving data point %s",
+ aDP->GetName()));
+ UpdateShuttleStatus(AliShuttleStatus::kDCSError);
+ dcsMap.DeleteAll();
+ return kFALSE;
+ }
+ }
+ }
+
+ // DCS Archive DB processing successful. Call Preprocessor!
+ UpdateShuttleStatus(AliShuttleStatus::kPPStarted);
+
+ UInt_t returnValue = aPreprocessor->Process(&dcsMap);
+
+ if (returnValue > 0) // Preprocessor error!
+ {
+ Log(fCurrentDetector, Form("Preprocessor failed. Process returned %d.", returnValue));
+ UpdateShuttleStatus(AliShuttleStatus::kPPError);
+ dcsMap.DeleteAll();
+ return kFALSE;
+ }
+
+ // preprocessor ok!
+ UpdateShuttleStatus(AliShuttleStatus::kPPDone);
+ Log(fCurrentDetector, Form("ProcessCurrentDetector - %s preprocessor returned success",
+ fCurrentDetector.Data()));
+
+ dcsMap.DeleteAll();
+
+ return kTRUE;
+}
+
+//______________________________________________________________________________________________
+Bool_t AliShuttle::QueryShuttleLogbook(const char* whereClause,
+ TObjArray& entries)
+{
+ // Query DAQ's Shuttle logbook and fills detector status object.
+ // Call QueryRunParameters to query DAQ logbook for run parameters.
+ //
+
+ entries.SetOwner(1);
+
+ // check connection, in case connect
+ if(!Connect(3)) return kFALSE;
+
+ TString sqlQuery;
+ sqlQuery = Form("select * from %s %s order by run", fConfig->GetShuttlelbTable(), whereClause);
+
+ TSQLResult* aResult = fServer[3]->Query(sqlQuery);
+ if (!aResult) {
+ AliError(Form("Can't execute query <%s>!", sqlQuery.Data()));
+ return kFALSE;
+ }
+
+ AliDebug(2,Form("Query = %s", sqlQuery.Data()));
+
+ if(aResult->GetRowCount() == 0) {
+ AliInfo("No entries in Shuttle Logbook match request");
+ delete aResult;
+ return kTRUE;
+ }
+
+ // TODO Check field count!
+ const UInt_t nCols = 22;
+ if (aResult->GetFieldCount() != (Int_t) nCols) {
+ AliError("Invalid SQL result field number!");
+ delete aResult;
+ return kFALSE;
+ }
+
+ TSQLRow* aRow;
+ while ((aRow = aResult->Next())) {
+ TString runString(aRow->GetField(0), aRow->GetFieldLength(0));
+ Int_t run = runString.Atoi();
+
+ AliShuttleLogbookEntry *entry = QueryRunParameters(run);
+ if (!entry)
+ continue;
+
+ // loop on detectors
+ for(UInt_t ii = 0; ii < nCols; ii++)
+ entry->SetDetectorStatus(aResult->GetFieldName(ii), aRow->GetField(ii));
+
+ entries.AddLast(entry);
+ delete aRow;
+ }
+
+ delete aResult;
+ return kTRUE;
+}
+
+//______________________________________________________________________________________________
+AliShuttleLogbookEntry* AliShuttle::QueryRunParameters(Int_t run)
+{
+ //
+ // Retrieve run parameters written in the DAQ logbook and sets them into AliShuttleLogbookEntry object
+ //
+
+ // check connection, in case connect
+ if (!Connect(3))
+ return 0;
+
+ TString sqlQuery;
+ sqlQuery.Form("select * from %s where run=%d", fConfig->GetDAQlbTable(), run);
+
+ TSQLResult* aResult = fServer[3]->Query(sqlQuery);
+ if (!aResult) {
+ AliError(Form("Can't execute query <%s>!", sqlQuery.Data()));
+ return 0;
+ }
+
+ if (aResult->GetRowCount() == 0) {
+ Log("SHUTTLE", Form("QueryRunParameters - No entry in DAQ Logbook for run %d. Skipping", run));
+ delete aResult;
+ return 0;
+ }
+
+ if (aResult->GetRowCount() > 1) {
+ AliError(Form("More than one entry in DAQ Logbook for run %d. Skipping", run));
+ delete aResult;
+ return 0;
+ }
+
+ TSQLRow* aRow = aResult->Next();
+ if (!aRow)
+ {
+ AliError(Form("Could not retrieve row for run %d. Skipping", run));
+ delete aResult;
return 0;
}
- // Query preparation
- TString sqlQueryStart = "select DAQsource from logbook_fs where";
- TString whereClause = Form("run=%d and detector=\"%s\" and fileId=\"%s\"",
- fCurrentRun, GetDetCode(detector), id);
- TString sqlQuery = Form("%s %s", sqlQueryStart.Data(), whereClause.Data());
+ AliShuttleLogbookEntry* entry = new AliShuttleLogbookEntry(run);
+
+ for (Int_t ii = 0; ii < aResult->GetFieldCount(); ii++)
+ entry->SetRunParameter(aResult->GetFieldName(ii), aRow->GetField(ii));
+
+ UInt_t startTime = entry->GetStartTime();
+ UInt_t endTime = entry->GetEndTime();
+
+ if (!startTime || !endTime || startTime > endTime) {
+ Log("SHUTTLE",
+ Form("QueryRunParameters - Invalid parameters for Run %d: startTime = %d, endTime = %d",
+ run, startTime, endTime));
+ delete entry;
+ delete aRow;
+ delete aResult;
+ return 0;
+ }
+
+ delete aRow;
+ delete aResult;
+
+ return entry;
+}
+
+//______________________________________________________________________________________________
+Bool_t AliShuttle::GetValueSet(const char* host, Int_t port, const char* entry,
+ TObjArray* valueSet, DCSType type)
+{
+ // Retrieve all "entry" data points from the DCS server
+ // host, port: TSocket connection parameters
+ // entry: name of the alias or data point
+ // valueSet: array of retrieved AliDCSValue's
+ // type: kAlias or kDP
+
+ AliDCSClient client(host, port, fTimeout, fRetries);
+ if (!client.IsConnected())
+ {
+ return kFALSE;
+ }
+
+ Int_t result=0;
+
+ if (type == kAlias)
+ {
+ result = client.GetAliasValues(entry,
+ GetCurrentStartTime(), GetCurrentEndTime(), valueSet);
+ } else
+ if (type == kDP)
+ {
+ result = client.GetDPValues(entry,
+ GetCurrentStartTime(), GetCurrentEndTime(), valueSet);
+ }
+
+ if (result < 0)
+ {
+ Log(fCurrentDetector.Data(), Form("GetValueSet - Can't get '%s'! Reason: %s",
+ entry, AliDCSClient::GetErrorString(result)));
+
+ if (result == AliDCSClient::fgkServerError)
+ {
+ Log(fCurrentDetector.Data(), Form("GetValueSet - Server error: %s",
+ client.GetServerError().Data()));
+ }
+
+ return kFALSE;
+ }
+
+ return kTRUE;
+}
+
+//______________________________________________________________________________________________
+const char* AliShuttle::GetFile(Int_t system, const char* detector,
+ const char* id, const char* source)
+{
+ // Get calibration file from file exchange servers
+ // First queris the FXS database for the file name, using the run, detector, id and source info
+ // then calls RetrieveFile(filename) for actual copy to local disk
+ // run: current run being processed (given by Logbook entry fLogbookEntry)
+ // detector: the Preprocessor name
+ // id: provided as a parameter by the Preprocessor
+ // source: provided by the Preprocessor through GetFileSources function
+
+ // check if test mode should simulate a FXS error
+ if (fTestMode & kErrorFXSFiles)
+ {
+ Log(detector, Form("GetFile - In TESTMODE - Simulating error while connecting to %s FXS", GetSystemName(system)));
+ return 0;
+ }
+
+ // check connection, in case connect
+ if (!Connect(system))
+ {
+ Log(detector, Form("GetFile - Couldn't connect to %s FXS database", GetSystemName(system)));
+ return 0;
+ }
+
+ // Query preparation
+ TString sourceName(source);
+ Int_t nFields = 3;
+ TString sqlQueryStart = Form("select filePath,size,fileChecksum from %s where",
+ fConfig->GetFXSdbTable(system));
+ TString whereClause = Form("run=%d and detector=\"%s\" and fileId=\"%s\"",
+ GetCurrentRun(), detector, id);
+
+ if (system == kDAQ)
+ {
+ whereClause += Form(" and DAQsource=\"%s\"", source);
+ }
+ else if (system == kDCS)
+ {
+ sourceName="none";
+ }
+ else if (system == kHLT)
+ {
+ whereClause += Form(" and DDLnumbers=\"%s\"", source);
+ nFields = 3;
+ }
+
+ TString sqlQuery = Form("%s %s", sqlQueryStart.Data(), whereClause.Data());
+
+ AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
+
+ // Query execution
+ TSQLResult* aResult = 0;
+ aResult = dynamic_cast<TSQLResult*> (fServer[system]->Query(sqlQuery));
+ if (!aResult) {
+ Log(detector, Form("GetFileName - Can't execute SQL query to %s database for: id = %s, source = %s",
+ GetSystemName(system), id, sourceName.Data()));
+ return 0;
+ }
+
+ if(aResult->GetRowCount() == 0)
+ {
+ Log(detector,
+ Form("GetFileName - No entry in %s FXS db for: id = %s, source = %s",
+ GetSystemName(system), id, sourceName.Data()));
+ delete aResult;
+ return 0;
+ }
+
+ if (aResult->GetRowCount() > 1) {
+ Log(detector,
+ Form("GetFileName - More than one entry in %s FXS db for: id = %s, source = %s",
+ GetSystemName(system), id, sourceName.Data()));
+ delete aResult;
+ return 0;
+ }
+
+ if (aResult->GetFieldCount() != nFields) {
+ Log(detector,
+ Form("GetFileName - Wrong field count in %s FXS db for: id = %s, source = %s",
+ GetSystemName(system), id, sourceName.Data()));
+ delete aResult;
+ return 0;
+ }
+
+ TSQLRow* aRow = dynamic_cast<TSQLRow*> (aResult->Next());
+
+ if (!aRow){
+ Log(detector, Form("GetFileName - Empty set result in %s FXS db from query: id = %s, source = %s",
+ GetSystemName(system), id, sourceName.Data()));
+ delete aResult;
+ return 0;
+ }
+
+ TString filePath(aRow->GetField(0), aRow->GetFieldLength(0));
+ TString fileSize(aRow->GetField(1), aRow->GetFieldLength(1));
+ TString fileChecksum(aRow->GetField(2), aRow->GetFieldLength(2));
+
+ delete aResult;
+ delete aRow;
+
+ AliDebug(2, Form("filePath = %s; size = %s, fileChecksum = %s",
+ filePath.Data(), fileSize.Data(), fileChecksum.Data()));
+
+ // retrieved file is renamed to make it unique
+ TString localFileName = Form("%s_%s_%d_%s_%s.shuttle",
+ GetSystemName(system), detector, GetCurrentRun(), id, sourceName.Data());
+
+
+ // file retrieval from FXS
+ UInt_t nRetries = 0;
+ UInt_t maxRetries = 3;
+ Bool_t result = kFALSE;
+
+ // copy!! if successful TSystem::Exec returns 0
+ while(nRetries++ < maxRetries) {
+ AliDebug(2, Form("Trying to copy file. Retry # %d", nRetries));
+ result = RetrieveFile(system, filePath.Data(), localFileName.Data());
+ if(!result)
+ {
+ Log(detector, Form("GetFileName - Copy of file %s from %s FXS failed",
+ filePath.Data(), GetSystemName(system)));
+ continue;
+ } else {
+ AliInfo(Form("File %s copied from %s FXS into %s/%s",
+ filePath.Data(), GetSystemName(system),
+ GetShuttleTempDir(), localFileName.Data()));
+ }
+
+ if (fileChecksum.Length()>0)
+ {
+ // compare md5sum of local file with the one stored in the FXS DB
+ Int_t md5Comp = gSystem->Exec(Form("md5sum %s/%s |grep %s 2>&1 > /dev/null",
+ GetShuttleTempDir(), localFileName.Data(), fileChecksum.Data()));
+
+ if (md5Comp != 0)
+ {
+ Log(detector, Form("GetFileName - md5sum of file %s does not match with local copy!",
+ filePath.Data()));
+ result = kFALSE;
+ continue;
+ }
+ } else {
+ Log(fCurrentDetector, Form("GetFile - md5sum of file %s not set in %s database, skipping comparison",
+ filePath.Data(), GetSystemName(system)));
+ }
+ if (result) break;
+ }
+
+ if(!result) return 0;
+
+ fFXSCalled[system]=kTRUE;
+ TObjString *fileParams = new TObjString(Form("%s#!?!#%s", id, sourceName.Data()));
+ fFXSlist[system].Add(fileParams);
+
+ static TString fullLocalFileName;
+ fullLocalFileName = TString::Format("%s/%s", GetShuttleTempDir(), localFileName.Data());
+
+ AliInfo(Form("fullLocalFileName = %s", fullLocalFileName.Data()));
+
+ return fullLocalFileName.Data();
+
+}
+
+//______________________________________________________________________________________________
+Bool_t AliShuttle::RetrieveFile(UInt_t system, const char* fxsFileName, const char* localFileName)
+{
+ //
+ // Copies file from FXS to local Shuttle machine
+ //
+
+ // check temp directory: trying to cd to temp; if it does not exist, create it
+ AliDebug(2, Form("Copy file %s from %s FXS into %s/%s",
+ GetSystemName(system), fxsFileName, GetShuttleTempDir(), localFileName));
+
+ void* dir = gSystem->OpenDirectory(GetShuttleTempDir());
+ if (dir == NULL) {
+ if (gSystem->mkdir(GetShuttleTempDir(), kTRUE)) {
+ AliError(Form("Can't open directory <%s>", GetShuttleTempDir()));
+ return kFALSE;
+ }
+
+ } else {
+ gSystem->FreeDirectory(dir);
+ }
+
+ TString baseFXSFolder;
+ if (system == kDAQ)
+ {
+ baseFXSFolder = "FES/";
+ }
+ else if (system == kDCS)
+ {
+ baseFXSFolder = "";
+ }
+ else if (system == kHLT)
+ {
+ baseFXSFolder = "~/";
+ }
+
+
+ TString command = Form("scp -oPort=%d -2 %s@%s:%s%s %s/%s",
+ fConfig->GetFXSPort(system),
+ fConfig->GetFXSUser(system),
+ fConfig->GetFXSHost(system),
+ baseFXSFolder.Data(),
+ fxsFileName,
+ GetShuttleTempDir(),
+ localFileName);
+
+ AliDebug(2, Form("%s",command.Data()));
+
+ Bool_t result = (gSystem->Exec(command.Data()) == 0);
+
+ return result;
+}
+
+//______________________________________________________________________________________________
+TList* AliShuttle::GetFileSources(Int_t system, const char* detector, const char* id)
+{
+ //
+ // Get sources producing the condition file Id from file exchange servers
+ //
+
+ // check if test mode should simulate a FXS error
+ if (fTestMode & kErrorFXSSources)
+ {
+ Log(detector, Form("GetFileSources - In TESTMODE - Simulating error while connecting to %s FXS", GetSystemName(system)));
+ return 0;
+ }
+
+
+ if (system == kDCS)
+ {
+ AliError("DCS system has only one source of data!");
+ return NULL;
+ }
+
+ // check connection, in case connect
+ if (!Connect(system))
+ {
+ Log(detector, Form("GetFile - Couldn't connect to %s FXS database", GetSystemName(system)));
+ return NULL;
+ }
+
+ TString sourceName = 0;
+ if (system == kDAQ)
+ {
+ sourceName = "DAQsource";
+ } else if (system == kHLT)
+ {
+ sourceName = "DDLnumbers";
+ }
+
+ TString sqlQueryStart = Form("select %s from %s where", sourceName.Data(), fConfig->GetFXSdbTable(system));
+ TString whereClause = Form("run=%d and detector=\"%s\" and fileId=\"%s\"",
+ GetCurrentRun(), detector, id);
+ TString sqlQuery = Form("%s %s", sqlQueryStart.Data(), whereClause.Data());
+
+ AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
+
+ // Query execution
+ TSQLResult* aResult;
+ aResult = fServer[system]->Query(sqlQuery);
+ if (!aResult) {
+ Log(detector, Form("GetFileSources - Can't execute SQL query to %s database for id: %s",
+ GetSystemName(system), id));
+ return 0;
+ }
+
+ if (aResult->GetRowCount() == 0)
+ {
+ Log(detector,
+ Form("GetFileSources - No entry in %s FXS table for id: %s", GetSystemName(system), id));
+ delete aResult;
+ return 0;
+ }
+
+ TSQLRow* aRow;
+ TList *list = new TList();
+ list->SetOwner(1);
+
+ while ((aRow = aResult->Next()))
+ {
+
+ TString source(aRow->GetField(0), aRow->GetFieldLength(0));
+ AliDebug(2, Form("%s = %s", sourceName.Data(), source.Data()));
+ list->Add(new TObjString(source));
+ delete aRow;
+ }
+
+ delete aResult;
+
+ return list;
+}
+
+//______________________________________________________________________________________________
+Bool_t AliShuttle::Connect(Int_t system)
+{
+ // Connect to MySQL Server of the system's FXS MySQL databases
+ // DAQ Logbook, Shuttle Logbook and DAQ FXS db are on the same host
+ //
+
+ // check connection: if already connected return
+ if(fServer[system] && fServer[system]->IsConnected()) return kTRUE;
+
+ TString dbHost, dbUser, dbPass, dbName;
- AliInfo(Form("SQL query: \n%s",sqlQuery.Data()));
+ if (system < 3) // FXS db servers
+ {
+ dbHost = Form("mysql://%s:%d", fConfig->GetFXSdbHost(system), fConfig->GetFXSdbPort(system));
+ dbUser = fConfig->GetFXSdbUser(system);
+ dbPass = fConfig->GetFXSdbPass(system);
+ dbName = fConfig->GetFXSdbName(system);
+ } else { // Run & Shuttle logbook servers
+ // TODO Will the Shuttle logbook server be the same as the Run logbook server ???
+ dbHost = Form("mysql://%s:%d", fConfig->GetDAQlbHost(), fConfig->GetDAQlbPort());
+ dbUser = fConfig->GetDAQlbUser();
+ dbPass = fConfig->GetDAQlbPass();
+ dbName = fConfig->GetDAQlbDB();
+ }
- // Query execution
- TSQLResult* aResult;
- aResult = fServer[kDAQ]->Query(sqlQuery);
- if (!aResult) {
- Log(detector, Form("GetDAQFileSources: Can't execute query <%s>!", sqlQuery.Data()));
- return 0;
+ fServer[system] = TSQLServer::Connect(dbHost.Data(), dbUser.Data(), dbPass.Data());
+ if (!fServer[system] || !fServer[system]->IsConnected()) {
+ if(system < 3)
+ {
+ AliError(Form("Can't establish connection to FXS database for %s",
+ AliShuttleInterface::GetSystemName(system)));
+ } else {
+ AliError("Can't establish connection to Run logbook.");
+ }
+ if(fServer[system]) delete fServer[system];
+ return kFALSE;
}
- if (aResult->GetRowCount() == 0) {
- Log(detector,
- Form("GetDAQFileSources: No result from SQL query <%s>!", sqlQuery.Data()));
- delete aResult;
- return 0;
+ // Get tables
+ TSQLResult* aResult=0;
+ switch(system){
+ case kDAQ:
+ aResult = fServer[kDAQ]->GetTables(dbName.Data());
+ break;
+ case kDCS:
+ aResult = fServer[kDCS]->GetTables(dbName.Data());
+ break;
+ case kHLT:
+ aResult = fServer[kHLT]->GetTables(dbName.Data());
+ break;
+ default:
+ aResult = fServer[3]->GetTables(dbName.Data());
+ break;
}
- TSQLRow* aRow;
- TList *list = new TList();
- list->SetOwner(1);
+ delete aResult;
+ return kTRUE;
+}
+
+//______________________________________________________________________________________________
+Bool_t AliShuttle::UpdateTable()
+{
+ //
+ // Update FXS table filling time_processed field in all rows corresponding to current run and detector
+ //
+
+ Bool_t result = kTRUE;
+
+ for (UInt_t system=0; system<3; system++)
+ {
+ if(!fFXSCalled[system]) continue;
- while((aRow = aResult->Next())){
+ // check connection, in case connect
+ if (!Connect(system))
+ {
+ Log(fCurrentDetector, Form("UpdateTable - Couldn't connect to %s FXS database", GetSystemName(system)));
+ result = kFALSE;
+ continue;
+ }
- TString daqSource(aRow->GetField(0), aRow->GetFieldLength(0));
- AliInfo(Form("daqSource = %s", daqSource.Data()));
- list->Add(new TObjString(daqSource));
+ TTimeStamp now; // now
+
+ // Loop on FXS list entries
+ TIter iter(&fFXSlist[system]);
+ TObjString *aFXSentry=0;
+ while ((aFXSentry = dynamic_cast<TObjString*> (iter.Next())))
+ {
+ TString aFXSentrystr = aFXSentry->String();
+ TObjArray *aFXSarray = aFXSentrystr.Tokenize("#!?!#");
+ if (!aFXSarray || aFXSarray->GetEntries() != 2 )
+ {
+ Log(fCurrentDetector, Form("UpdateTable - error updating %s FXS entry. Check string: <%s>",
+ GetSystemName(system), aFXSentrystr.Data()));
+ if(aFXSarray) delete aFXSarray;
+ result = kFALSE;
+ continue;
+ }
+ const char* fileId = ((TObjString*) aFXSarray->At(0))->GetName();
+ const char* source = ((TObjString*) aFXSarray->At(1))->GetName();
+
+ TString whereClause;
+ if (system == kDAQ)
+ {
+ whereClause = Form("where run=%d and detector=\"%s\" and fileId=\"%s\" and DAQsource=\"%s\";",
+ GetCurrentRun(), fCurrentDetector.Data(), fileId, source);
+ }
+ else if (system == kDCS)
+ {
+ whereClause = Form("where run=%d and detector=\"%s\" and fileId=\"%s\";",
+ GetCurrentRun(), fCurrentDetector.Data(), fileId);
+ }
+ else if (system == kHLT)
+ {
+ whereClause = Form("where run=%d and detector=\"%s\" and fileId=\"%s\" and DDLnumbers=\"%s\";",
+ GetCurrentRun(), fCurrentDetector.Data(), fileId, source);
+ }
+
+ delete aFXSarray;
+
+ TString sqlQuery = Form("update %s set time_processed=%d %s", fConfig->GetFXSdbTable(system),
+ now.GetSec(), whereClause.Data());
+
+ AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
+
+ // Query execution
+ TSQLResult* aResult;
+ aResult = dynamic_cast<TSQLResult*> (fServer[system]->Query(sqlQuery));
+ if (!aResult)
+ {
+ Log(fCurrentDetector, Form("UpdateTable - %s db: can't execute SQL query <%s>",
+ GetSystemName(system), sqlQuery.Data()));
+ result = kFALSE;
+ continue;
+ }
+ delete aResult;
+ }
}
- delete aResult;
- return list;
+ return result;
+}
+
+//______________________________________________________________________________________________
+Bool_t AliShuttle::UpdateTableFailCase()
+{
+ // Update FXS table filling time_processed field in all rows corresponding to current run and detector
+ // this is called in case the preprocessor is declared failed for the current run, because
+ // the fields are updated only in case of success
+
+ Bool_t result = kTRUE;
+
+ for (UInt_t system=0; system<3; system++)
+ {
+ // check connection, in case connect
+ if (!Connect(system))
+ {
+ Log(fCurrentDetector, Form("UpdateTableFailCase - Couldn't connect to %s FXS database",
+ GetSystemName(system)));
+ result = kFALSE;
+ continue;
+ }
+
+ TTimeStamp now; // now
+
+ // Loop on FXS list entries
+
+ TString whereClause = Form("where run=%d and detector=\"%s\";",
+ GetCurrentRun(), fCurrentDetector.Data());
+
+
+ TString sqlQuery = Form("update %s set time_processed=%d %s", fConfig->GetFXSdbTable(system),
+ now.GetSec(), whereClause.Data());
+
+ AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
+
+ // Query execution
+ TSQLResult* aResult;
+ aResult = dynamic_cast<TSQLResult*> (fServer[system]->Query(sqlQuery));
+ if (!aResult)
+ {
+ Log(fCurrentDetector, Form("UpdateTableFailCase - %s db: can't execute SQL query <%s>",
+ GetSystemName(system), sqlQuery.Data()));
+ result = kFALSE;
+ continue;
+ }
+ delete aResult;
+ }
+ return result;
}
//______________________________________________________________________________________________
-Bool_t AliShuttle::UpdateDAQTable(const char* detector){
-// Update DAQ table filling time_processed field in all rows corresponding to current run and detector
+Bool_t AliShuttle::UpdateShuttleLogbook(const char* detector, const char* status)
+{
+ //
+ // Update Shuttle logbook filling detector or shuttle_done column
+ // ex. of usage: UpdateShuttleLogbook("PHOS", "DONE") or UpdateShuttleLogbook("shuttle_done")
+ //
// check connection, in case connect
- if(!Connect(kDAQ)){
- Log(detector, "UpdateDAQTable: Couldn't connect to DAQ Logbook !");
+ if(!Connect(3)){
+ Log("SHUTTLE", "UpdateShuttleLogbook - Couldn't connect to DAQ Logbook.");
return kFALSE;
}
- TTimeStamp now; // now
+ TString detName(detector);
+ TString setClause;
+ if(detName == "shuttle_done")
+ {
+ setClause = "set shuttle_done=1";
+
+ // Send the information to ML
+ TMonaLisaText mlStatus("SHUTTLE_status", "Done");
- // Loop on FES list entries
- TIter iter(&fFESlist[kDAQ]);
- TObjString *aFESentry=0;
- while((aFESentry = dynamic_cast<TObjString*> (iter.Next()))){
- TString aFESentrystr = aFESentry->String();
- TObjArray *aFESarray = aFESentrystr.Tokenize("_!?!_");
- if(!aFESarray || aFESarray->GetEntries() != 2 ) {
- Log(detector,Form("UpdateDAQTable: error updating FES entry! string = %s",
- aFESentrystr.Data()));
- if(aFESarray) delete aFESarray;
+ TList mlList;
+ mlList.Add(&mlStatus);
+
+ fMonaLisa->SendParameters(&mlList);
+ } else {
+ TString statusStr(status);
+ if(statusStr.Contains("done", TString::kIgnoreCase) ||
+ statusStr.Contains("failed", TString::kIgnoreCase)){
+ setClause = Form("set %s=\"%s\"", detector, status);
+ } else {
+ Log("SHUTTLE",
+ Form("UpdateShuttleLogbook - Invalid status <%s> for detector %s",
+ status, detector));
return kFALSE;
}
- const char* fileId = ((TObjString*) aFESarray->At(0))->GetName();
- const char* daqSource = ((TObjString*) aFESarray->At(1))->GetName();
- TString whereClause = Form("where run=%d and detector=\"%s\" and fileId=\"%s\" and DAQsource=\"%s\";",
- fCurrentRun,GetDetCode(detector), fileId, daqSource);
+ }
- delete aFESarray;
+ TString whereClause = Form("where run=%d", GetCurrentRun());
- TString sqlQuery = Form("update logbook_fs set time_processed=%d %s", now.GetSec(), whereClause.Data());
+ TString sqlQuery = Form("update %s %s %s",
+ fConfig->GetShuttlelbTable(), setClause.Data(), whereClause.Data());
- AliInfo(Form("SQL query: \n%s",sqlQuery.Data()));
+ AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
- // Query execution
- TSQLResult* aResult;
- aResult = dynamic_cast<TSQLResult*> (fServer[kDAQ]->Query(sqlQuery));
- if (!aResult) {
- Log(detector, Form("UpdateDAQTable: Can't execute query <%s>!", sqlQuery.Data()));
- return kFALSE;
- }
- delete aResult;
+ // Query execution
+ TSQLResult* aResult;
+ aResult = dynamic_cast<TSQLResult*> (fServer[3]->Query(sqlQuery));
+ if (!aResult) {
+ Log("SHUTTLE", Form("UpdateShuttleLogbook - Can't execute query <%s>", sqlQuery.Data()));
+ return kFALSE;
+ }
+ delete aResult;
- // check result - TODO Is it necessary?
- sqlQuery = Form("select time_processed from logbook_fs %s", whereClause.Data());
- AliInfo(Form(" CHECK - SQL query: \n%s",sqlQuery.Data()));
+ return kTRUE;
+}
- aResult = dynamic_cast<TSQLResult*> (fServer[kDAQ]->Query(sqlQuery));
- if (!aResult) {
- AliWarning("Can't check result!");
- continue;
+//______________________________________________________________________________________________
+Int_t AliShuttle::GetCurrentRun() const
+{
+ //
+ // Get current run from logbook entry
+ //
+
+ return fLogbookEntry ? fLogbookEntry->GetRun() : -1;
+}
+
+//______________________________________________________________________________________________
+UInt_t AliShuttle::GetCurrentStartTime() const
+{
+ //
+ // get current start time
+ //
+
+ return fLogbookEntry ? fLogbookEntry->GetStartTime() : 0;
+}
+
+//______________________________________________________________________________________________
+UInt_t AliShuttle::GetCurrentEndTime() const
+{
+ //
+ // get current end time from logbook entry
+ //
+
+ return fLogbookEntry ? fLogbookEntry->GetEndTime() : 0;
+}
+
+//______________________________________________________________________________________________
+void AliShuttle::Log(const char* detector, const char* message)
+{
+ //
+ // Fill log string with a message
+ //
+
+ void* dir = gSystem->OpenDirectory(GetShuttleLogDir());
+ if (dir == NULL) {
+ if (gSystem->mkdir(GetShuttleLogDir(), kTRUE)) {
+ AliError(Form("Can't open directory <%s>", GetShuttleLogDir()));
+ return;
}
- if (aResult->GetRowCount() == 0) {
- Log(detector,
- Form("GetDAQFileName: No result from SQL query <%s>!", sqlQuery.Data()));
- delete aResult;
- //return 0;
+ } else {
+ gSystem->FreeDirectory(dir);
}
- if (aResult->GetRowCount() >1) {
- Log(detector,
- Form("GetDAQFileName: More than one row resulting from SQL query <%s>!", sqlQuery.Data()));
- delete aResult;
- //return 0;
+ TString toLog = Form("%s (%d): %s - ", TTimeStamp(time(0)).AsString("s"), getpid(), detector);
+ if (GetCurrentRun() >= 0)
+ toLog += Form("run %d - ", GetCurrentRun());
+ toLog += Form("%s", message);
+
+ AliInfo(toLog.Data());
+
+ // if we redirect the log output already to the file, leave here
+ if (fOutputRedirected && strcmp(detector, "SHUTTLE") != 0)
+ return;
+
+ TString fileName = GetLogFileName(detector);
+
+ gSystem->ExpandPathName(fileName);
+
+ ofstream logFile;
+ logFile.open(fileName, ofstream::out | ofstream::app);
+
+ if (!logFile.is_open()) {
+ AliError(Form("Could not open file %s", fileName.Data()));
+ return;
+ }
+
+ logFile << toLog.Data() << "\n";
+
+ logFile.close();
+}
+
+//______________________________________________________________________________________________
+TString AliShuttle::GetLogFileName(const char* detector) const
+{
+ //
+ // returns the name of the log file for a given sub detector
+ //
+
+ TString fileName;
+
+ if (GetCurrentRun() >= 0)
+ fileName.Form("%s/%s_%d.log", GetShuttleLogDir(), detector, GetCurrentRun());
+ else
+ fileName.Form("%s/%s.log", GetShuttleLogDir(), detector);
+
+ return fileName;
+}
+
+//______________________________________________________________________________________________
+Bool_t AliShuttle::Collect(Int_t run)
+{
+ //
+ // Collects conditions data for all UNPROCESSED run written to DAQ LogBook in case of run = -1 (default)
+ // If a dedicated run is given this run is processed
+ //
+ // In operational mode, this is the Shuttle function triggered by the EOR signal.
+ //
+
+ if (run == -1)
+ Log("SHUTTLE","Collect - Shuttle called. Collecting conditions data for unprocessed runs");
+ else
+ Log("SHUTTLE", Form("Collect - Shuttle called. Collecting conditions data for run %d", run));
+
+ SetLastAction("Starting");
+
+ TString whereClause("where shuttle_done=0");
+ if (run != -1)
+ whereClause += Form(" and run=%d", run);
+
+ TObjArray shuttleLogbookEntries;
+ if (!QueryShuttleLogbook(whereClause, shuttleLogbookEntries))
+ {
+ Log("SHUTTLE", "Collect - Can't retrieve entries from Shuttle logbook");
+ return kFALSE;
}
- TSQLRow *row = dynamic_cast<TSQLRow*> (aResult->Next());
- TString processedTimeString(row->GetField(0), row->GetFieldLength(0));
- Int_t processedTime = processedTimeString.Atoi();
- if(processedTime != now.GetSec()){
- Log(detector, Form("UpdateDAQTable: Update table error: processed_time=%d, now=%d !",
- processedTime, now.GetSec()));
- delete aResult;
+ if (shuttleLogbookEntries.GetEntries() == 0)
+ {
+ if (run == -1)
+ Log("SHUTTLE","Collect - Found no UNPROCESSED runs in Shuttle logbook");
+ else
+ Log("SHUTTLE", Form("Collect - Run %d is already DONE "
+ "or it does not exist in Shuttle logbook", run));
+ return kTRUE;
+ }
+
+ for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
+ fFirstUnprocessed[iDet] = kTRUE;
+
+ if (run != -1)
+ {
+ // query Shuttle logbook for earlier runs, check if some detectors are unprocessed,
+ // flag them into fFirstUnprocessed array
+ TString whereClause(Form("where shuttle_done=0 and run < %d", run));
+ TObjArray tmpLogbookEntries;
+ if (!QueryShuttleLogbook(whereClause, tmpLogbookEntries))
+ {
+ Log("SHUTTLE", "Collect - Can't retrieve entries from Shuttle logbook");
return kFALSE;
}
- delete aResult;
+ TIter iter(&tmpLogbookEntries);
+ AliShuttleLogbookEntry* anEntry = 0;
+ while ((anEntry = dynamic_cast<AliShuttleLogbookEntry*> (iter.Next())))
+ {
+ for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
+ {
+ if (anEntry->GetDetectorStatus(iDet) == AliShuttleLogbookEntry::kUnprocessed)
+ {
+ AliDebug(2, Form("Run %d: setting %s as \"not first time unprocessed\"",
+ anEntry->GetRun(), GetDetName(iDet)));
+ fFirstUnprocessed[iDet] = kFALSE;
+ }
+ }
+ }
+
+ }
+
+ if (!RetrieveConditionsData(shuttleLogbookEntries))
+ {
+ Log("SHUTTLE", "Collect - Process of at least one run failed");
+ return kFALSE;
}
+ Log("SHUTTLE", "Collect - Requested run(s) successfully processed");
return kTRUE;
}
//______________________________________________________________________________________________
-const char* AliShuttle::GetDCSFileName(const char* /*detector*/, const char* /*id*/, const char* /*source*/){
-// Retrieves a file from the DCS FES.
+Bool_t AliShuttle::RetrieveConditionsData(const TObjArray& dateEntries)
+{
+ //
+ // Retrieve conditions data for all runs that aren't processed yet
+ //
+
+ Bool_t hasError = kFALSE;
+
+ TIter iter(&dateEntries);
+ AliShuttleLogbookEntry* anEntry;
+
+ while ((anEntry = (AliShuttleLogbookEntry*) iter.Next())){
+ if (!Process(anEntry)){
+ hasError = kTRUE;
+ }
+
+ // clean SHUTTLE temp directory
+ TString filename = Form("%s/*.shuttle", GetShuttleTempDir());
+ RemoveFile(filename.Data());
+ }
+
+ return hasError == kFALSE;
+}
+
+//______________________________________________________________________________________________
+ULong_t AliShuttle::GetTimeOfLastAction() const
+{
+ //
+ // Gets time of last action
+ //
+
+ ULong_t tmp;
+
+ fMonitoringMutex->Lock();
+
+ tmp = fLastActionTime;
-return "You're in DCS";
+ fMonitoringMutex->UnLock();
+ return tmp;
}
//______________________________________________________________________________________________
-TList* AliShuttle::GetDCSFileSources(const char* /*detector*/, const char* /*id*/){
-// Retrieves a file from the DCS FES.
+const TString AliShuttle::GetLastAction() const
+{
+ //
+ // returns a string description of the last action
+ //
+
+ TString tmp;
-return NULL;
+ fMonitoringMutex->Lock();
+
+ tmp = fLastAction;
+
+ fMonitoringMutex->UnLock();
+ return tmp;
}
//______________________________________________________________________________________________
-const char* AliShuttle::GetHLTFileName(const char* /*detector*/, const char* /*id*/, const char* /*source*/){
-// Retrieves a file from the HLT FES.
+void AliShuttle::SetLastAction(const char* action)
+{
+ //
+ // updates the monitoring variables
+ //
-return "You're in HLT";
+ fMonitoringMutex->Lock();
+ fLastAction = action;
+ fLastActionTime = time(0);
+
+ fMonitoringMutex->UnLock();
}
//______________________________________________________________________________________________
-TList* AliShuttle::GetHLTFileSources(const char* /*detector*/, const char* /*id*/){
-// Retrieves a file from the HLT FES.
+const char* AliShuttle::GetRunParameter(const char* param)
+{
+ //
+ // returns run parameter read from DAQ logbook
+ //
-return NULL;
+ if(!fLogbookEntry) {
+ AliError("No logbook entry!");
+ return 0;
+ }
+ return fLogbookEntry->GetRunParameter(param);
}
//______________________________________________________________________________________________
-const char* AliShuttle::GetDetCode(const char* detector){
-// Return detector code
+AliCDBEntry* AliShuttle::GetFromOCDB(const char* detector, const AliCDBPath& path)
+{
+ //
+ // returns object from OCDB valid for current run
+ //
- for(int iDet=0; iDet < fgkNDetectors; iDet++){
- if(!strcmp(fgkDetectorName[iDet], detector)) return fgkDetectorCode[iDet];
+ if (fTestMode & kErrorOCDB)
+ {
+ Log(detector, "GetFromOCDB - In TESTMODE - Simulating error with OCDB");
+ return 0;
+ }
+
+ AliCDBStorage *sto = AliCDBManager::Instance()->GetStorage(fgkMainCDB);
+ if (!sto)
+ {
+ Log(detector, "GetFromOCDB - Cannot activate main OCDB for query!");
+ return 0;
}
- return 0;
+ return dynamic_cast<AliCDBEntry*> (sto->Get(path, GetCurrentRun()));
}
//______________________________________________________________________________________________
-void AliShuttle::Log(const char* detector, const char* message)
+Bool_t AliShuttle::SendMail()
{
-// Fill log string with a message
+ //
+ // sends a mail to the subdetector expert in case of preprocessor error
+ //
+
+ if (fTestMode != kNone)
+ return kTRUE;
+
+ void* dir = gSystem->OpenDirectory(GetShuttleLogDir());
+ if (dir == NULL)
+ {
+ if (gSystem->mkdir(GetShuttleLogDir(), kTRUE))
+ {
+ AliError(Form("Can't open directory <%s>", GetShuttleLogDir()));
+ return kFALSE;
+ }
+
+ } else {
+ gSystem->FreeDirectory(dir);
+ }
+
+ TString bodyFileName;
+ bodyFileName.Form("%s/mail.body", GetShuttleLogDir());
+ gSystem->ExpandPathName(bodyFileName);
+
+ ofstream mailBody;
+ mailBody.open(bodyFileName, ofstream::out);
+
+ if (!mailBody.is_open())
+ {
+ AliError(Form("Could not open mail body file %s", bodyFileName.Data()));
+ return kFALSE;
+ }
+
+ TString to="";
+ TIter iterExperts(fConfig->GetResponsibles(fCurrentDetector));
+ TObjString *anExpert=0;
+ while ((anExpert = (TObjString*) iterExperts.Next()))
+ {
+ to += Form("%s,", anExpert->GetName());
+ }
+ to.Remove(to.Length()-1);
+ AliDebug(2, Form("to: %s",to.Data()));
+
+ // TODO this will be removed...
+ if (to.Contains("not_yet_set")) {
+ AliInfo("List of detector responsibles not yet set!");
+ return kFALSE;
+ }
+
+ TString cc="alberto.colla@cern.ch";
+
+ TString subject = Form("%s Shuttle preprocessor error in run %d !",
+ fCurrentDetector.Data(), GetCurrentRun());
+ AliDebug(2, Form("subject: %s", subject.Data()));
+
+ TString body = Form("Dear %s expert(s), \n\n", fCurrentDetector.Data());
+ body += Form("SHUTTLE just detected that your preprocessor "
+ "FAILED after %d retries in run %d!!\n\n", fConfig->GetMaxRetries(), GetCurrentRun());
+ body += Form("Please check %s status on the web page asap!\n\n", fCurrentDetector.Data());
+ body += Form("The last 10 lines of %s log file are following:\n\n");
+
+ AliDebug(2, Form("Body begin: %s", body.Data()));
+
+ mailBody << body.Data();
+ mailBody.close();
+ mailBody.open(bodyFileName, ofstream::out | ofstream::app);
+
+ TString logFileName = Form("%s/%s_%d.log", GetShuttleLogDir(), fCurrentDetector.Data(), GetCurrentRun());
+ TString tailCommand = Form("tail -n 10 %s >> %s", logFileName.Data(), bodyFileName.Data());
+ if (gSystem->Exec(tailCommand.Data()))
+ {
+ mailBody << Form("%s log file not found ...\n\n", fCurrentDetector.Data());
+ }
+
+ TString endBody = Form("------------------------------------------------------\n\n");
+ endBody += Form("In case of problems please contact the SHUTTLE core team.\n\n");
+ endBody += "Please do not answer this message directly, it is automatically generated.\n\n";
+ endBody += "Sincerely yours,\n\n \t\t\tthe SHUTTLE\n";
+
+ AliDebug(2, Form("Body end: %s", endBody.Data()));
+
+ mailBody << endBody.Data();
+
+ mailBody.close();
- TString toLog = Form("%s - %s", detector, message);
- AliError(toLog.Data());
+ // send mail!
+ TString mailCommand = Form("mail -s \"%s\" -c %s %s < %s",
+ subject.Data(),
+ cc.Data(),
+ to.Data(),
+ bodyFileName.Data());
+ AliDebug(2, Form("mail command: %s", mailCommand.Data()));
- fLog += toLog;
- fLog += "\n";
+ Bool_t result = gSystem->Exec(mailCommand.Data());
+ return result == 0;
}
//______________________________________________________________________________________________
-void AliShuttle::StoreLog(Int_t run)
+const char* AliShuttle::GetRunType()
{
-// store error log string to SHUTTLE/SYSTEM/ERROR (on local storage)
+ //
+ // returns run type read from "run type" logbook
+ //
+
+ if(!fLogbookEntry) {
+ AliError("No logbook entry!");
+ return 0;
+ }
- AliInfo("Printing fLog...");
- AliInfo(fLog.Data());
- // Storing log string for runs with errors in "SHUTTLE/SYSTEM/ERRORLOGS"
- TObjString *logString = new TObjString(fLog);
- AliCDBId badRunId("SHUTTLE/SYSTEM/ERRORLOGS",run,run);
- AliCDBMetaData metaData;
- AliCDBManager::Instance()->GetStorage(fgkLocalUri)
- ->Put(logString, badRunId,&metaData);
- delete logString;
+ return fLogbookEntry->GetRunType();
+}
+//______________________________________________________________________________________________
+void AliShuttle::SetShuttleTempDir(const char* tmpDir)
+{
+ //
+ // sets Shuttle temp directory
+ //
+ fgkShuttleTempDir = gSystem->ExpandPathName(tmpDir);
}
+//______________________________________________________________________________________________
+void AliShuttle::SetShuttleLogDir(const char* logDir)
+{
+ //
+ // sets Shuttle log directory
+ //
+
+ fgkShuttleLogDir = gSystem->ExpandPathName(logDir);
+}