X-Git-Url: http://git.uio.no/git/?p=u%2Fmrichter%2FAliRoot.git;a=blobdiff_plain;f=SHUTTLE%2FAliShuttle.cxx;h=b53b2396250d94e55929e101155dd380981cf9a0;hp=82f7399847c321b7e49c0bde58d21f04f47cf741;hb=b5f1c82e3e379712ea6385fbe24ff7c1b2f7a246;hpb=4a33bdd9e61f94345997f3a1c011de115d18f197 diff --git a/SHUTTLE/AliShuttle.cxx b/SHUTTLE/AliShuttle.cxx index 82f7399847c..b53b2396250 100644 --- a/SHUTTLE/AliShuttle.cxx +++ b/SHUTTLE/AliShuttle.cxx @@ -15,6 +15,141 @@ /* $Log$ +Revision 1.72 2007/12/13 15:44:28 acolla +Run type added in mail sent to detector expert (eases understanding) + +Revision 1.71 2007/12/12 14:56:14 jgrosseo +sending shuttle_ignore to ML also in case of 0 events + +Revision 1.70 2007/12/12 13:45:35 acolla +Monalisa started in Collect() function. Alive message to monitor is sent at each Collect and every minute during preprocessor processing. + +Revision 1.69 2007/12/12 10:06:29 acolla +in AliShuttle.cxx: SHUTTLE logbook is updated in case of invalid run times: + +time_start==0 && time_end==0 + +logbook is NOT updated if time_start != 0 && time_end == 0, because it may mean that the run is still ongoing. + +Revision 1.68 2007/12/11 10:15:17 acolla +Added marking SHUTTLE=DONE for invalid runs +(invalid start time or end time) and runs with totalEvents < 1 + +Revision 1.67 2007/12/07 19:14:36 acolla +in AliShuttleTrigger: + +Added automatic collection of new runs on a regular time basis (settable from the configuration) + +in AliShuttleConfig: new members + +- triggerWait: time to wait for DIM trigger (s) before starting automatic collection of new runs +- mode: run mode (test, prod) -> used to build log folder (logs or logs_PROD) + +in AliShuttle: + +- logs now stored in logs/#RUN/DET_#RUN.log + +Revision 1.66 2007/12/05 10:45:19 jgrosseo +changed order of arguments to TMonaLisaWriter + +Revision 1.65 2007/11/26 16:58:37 acolla +Monalisa configuration added: host and table name + +Revision 1.64 2007/11/13 16:15:47 acolla +DCS map is stored in a file in the temp folder where the detector is processed. +If the preprocessor fails, the temp folder is not removed. This will help the debugging of the problem. + +Revision 1.63 2007/11/02 10:53:16 acolla +Protection added to AliShuttle::CopyFileLocally + +Revision 1.62 2007/10/31 18:23:13 acolla +Furter developement on the Shuttle: + +- Shuttle now connects to the Grid as alidaq. The OCDB and Reference folders +are now built from /alice/data, e.g.: +/alice/data/2007/LHC07a/OCDB + +the year and LHC period are taken from the Shuttle. +Raw metadata files are stored by GRP to: +/alice/data/2007/LHC07a//Raw/RunMetadata.root + +- Shuttle sends a mail to DCS experts each time DP retrieval fails. + +Revision 1.61 2007/10/30 20:33:51 acolla +Improved managing of temporary folders, which weren't correctly handled. +Resolved bug introduced in StoreReferenceFile, which caused SPD preprocessor fail. + +Revision 1.60 2007/10/29 18:06:16 acolla + +New function StoreRunMetadataFile added to preprocessor and Shuttle interface +This function can be used by GRP only. It stores raw data tags merged file to the +raw data folder (e.g. /alice/data/2008/LHC08a/000099999/Raw). + +KNOWN ISSUES: + +1. Shuttle cannot write to /alice/data/ because it belongs to alidaq. Tag file is stored in /alice/simulation/... for the time being. +2. Due to a bug in TAlien::Mkdir, the creation of a folder in recursive mode (-p option) does not work. The problem +has been corrected in the root package on the Shuttle machine. + +Revision 1.59 2007/10/05 12:40:55 acolla + +Result error code added to AliDCSClient data members (it was "lost" with the new implementation of TMap* GetAliasValues and GetDPValues). + +Revision 1.58 2007/09/28 15:27:40 acolla + +AliDCSClient "multiSplit" option added in the DCS configuration +in AliDCSMessage: variable MAX_BODY_SIZE set to 500000 + +Revision 1.57 2007/09/27 16:53:13 acolla +Detectors can have more than one AMANDA server. SHUTTLE queries the servers sequentially, +merges the dcs aliases/DPs in one TMap and sends it to the preprocessor. + +Revision 1.56 2007/09/14 16:46:14 jgrosseo +1) Connect and Close are called before and after each query, so one can +keep the same AliDCSClient object. +2) The splitting of a query is moved to GetDPValues/GetAliasValues. +3) Splitting interval can be specified in constructor + +Revision 1.55 2007/08/06 12:26:40 acolla +Function Bool_t GetHLTStatus added to preprocessor. It returns the status of HLT +read from the run logbook. + +Revision 1.54 2007/07/12 09:51:25 jgrosseo +removed duplicated log message in GetFile + +Revision 1.53 2007/07/12 09:26:28 jgrosseo +updating hlt fxs base path + +Revision 1.52 2007/07/12 08:06:45 jgrosseo +adding log messages in getfile... functions +adding not implemented copy constructor in alishuttleconfigholder + +Revision 1.51 2007/07/03 17:24:52 acolla +root moved to v5-16-00. TFileMerger->Cp moved to TFile::Cp. + +Revision 1.50 2007/07/02 17:19:32 acolla +preprocessor is run in a temp directory that is removed when process is finished. + +Revision 1.49 2007/06/29 10:45:06 acolla +Number of columns in MySql Shuttle logbook increased by one (HLT added) + +Revision 1.48 2007/06/21 13:06:19 acolla +GetFileSources returns dummy list with 1 source if system=DCS (better than +returning error as it was) + +Revision 1.47 2007/06/19 17:28:56 acolla +HLT updated; missing map bug removed. + +Revision 1.46 2007/06/09 13:01:09 jgrosseo +Switching to retrieval of several DCS DPs at a time (multiDPrequest) + +Revision 1.45 2007/05/30 06:35:20 jgrosseo +Adding functionality to the Shuttle/TestShuttle: +o) Function to retrieve list of sources from a given system (GetFileSources with id=0) +o) Function to retrieve list of IDs for a given source (GetFileIDs) +These functions are needed for dealing with the tag files that are saved for the GRP preprocessor +Example code has been added to the TestProcessor in TestShuttle + Revision 1.44 2007/05/11 16:09:32 acolla Reference files for ITS, MUON and PHOS are now stored in OfflineDetName/OnlineDetName/run_... example: ITS/SPD/100_filename.root @@ -233,7 +368,7 @@ some docs added #include #include #include -#include +#include #include #include @@ -436,16 +571,23 @@ Bool_t AliShuttle::StoreOCDB() return kFALSE; } - Log("SHUTTLE","Storing OCDB data ..."); + Log("SHUTTLE","StoreOCDB - Storing OCDB data ..."); Bool_t resultCDB = StoreOCDB(fgkMainCDB); - Log("SHUTTLE","Storing reference data ..."); + Log("SHUTTLE","StoreOCDB - Storing reference data ..."); Bool_t resultRef = StoreOCDB(fgkMainRefStorage); - Log("SHUTTLE","Storing reference files ..."); - Bool_t resultRefFiles = StoreRefFilesToGrid(); + Log("SHUTTLE","StoreOCDB - Storing reference files ..."); + Bool_t resultRefFiles = CopyFilesToGrid("reference"); + + Bool_t resultMetadata = kTRUE; + if(fCurrentDetector == "GRP") + { + Log("StoreOCDB - SHUTTLE","Storing Run Metadata file ..."); + resultMetadata = CopyFilesToGrid("metadata"); + } - return resultCDB && resultRef && resultRefFiles; + return resultCDB && resultRef && resultRefFiles && resultMetadata; } //______________________________________________________________________________________________ @@ -549,7 +691,7 @@ Bool_t AliShuttle::StoreOCDB(const TString& gridURI) // removing local filename... TString filename; localSto->IdToFilename(aLocId, filename); - AliInfo(Form("Removing local file %s", filename.Data())); + Log("SHUTTLE", Form("StoreOCDB - Removing local file %s", filename.Data())); RemoveFile(filename.Data()); continue; } else { @@ -578,7 +720,7 @@ Bool_t AliShuttle::CleanReferenceStorage(const char* detector) TString targetDir = GetRefFilePrefix(localBaseFolder.Data(), detector); - Log("SHUTTLE", Form("Cleaning %s", targetDir.Data())); + Log("SHUTTLE", Form("CleanReferenceStorage - Cleaning %s", targetDir.Data())); TString begin; begin.Form("%d_", GetCurrentRun()); @@ -620,7 +762,7 @@ Bool_t AliShuttle::CleanReferenceStorage(const char* detector) if (result) { - Log("SHUTTLE", Form("Could not delete file %s!", fileName.Data())); + Log("SHUTTLE", Form("CleanReferenceStorage - Could not delete file %s!", fileName.Data())); success = kFALSE; } else { nDel++; @@ -644,10 +786,10 @@ Bool_t AliShuttle::CleanReferenceStorage(const char* detector) if (result == 0) { // delete directory - result = gSystem->Exec(Form("rm -r %s", targetDir.Data())); + result = gSystem->Exec(Form("rm -rf %s", targetDir.Data())); if (result != 0) { - Log("SHUTTLE", Form("StoreReferenceFile - Could not clear directory %s", targetDir.Data())); + Log("SHUTTLE", Form("CleanReferenceStorage - Could not clean directory %s", targetDir.Data())); return kFALSE; } } @@ -655,7 +797,7 @@ Bool_t AliShuttle::CleanReferenceStorage(const char* detector) result = gSystem->mkdir(targetDir, kTRUE); if (result != 0) { - Log("SHUTTLE", Form("StoreReferenceFile - Error creating base directory %s", targetDir.Data())); + Log("SHUTTLE", Form("CleanReferenceStorage - Error creating base directory %s", targetDir.Data())); return kFALSE; } @@ -684,74 +826,167 @@ Bool_t AliShuttle::StoreReferenceFile(const char* detector, const char* localFil TString localBaseFolder = sto->GetBaseFolder(); - TString targetDir = GetRefFilePrefix(localBaseFolder.Data(), detector); + TString target = GetRefFilePrefix(localBaseFolder.Data(), detector); + target.Append(Form("/%d_%s", GetCurrentRun(), gridFileName)); - //try to open folder, if does not exist + return CopyFileLocally(localFile, target); +} + +//______________________________________________________________________________________________ +Bool_t AliShuttle::StoreRunMetadataFile(const char* localFile, const char* gridFileName) +{ + // + // Stores Run metadata file to the Grid, in the run folder + // + // Only GRP can call this function. + + if (fTestMode & kErrorStorage) + { + Log(fCurrentDetector, "StoreRunMetaDataFile - In TESTMODE - Simulating error while storing locally"); + return kFALSE; + } + + AliCDBManager* man = AliCDBManager::Instance(); + AliCDBStorage* sto = man->GetStorage(fgkLocalRefStorage); + + TString localBaseFolder = sto->GetBaseFolder(); + + // Build Run level folder + // folder = /alice/data/year/lhcPeriod/runNb/Raw + + + TString lhcPeriod = GetLHCPeriod(); + if (lhcPeriod.Length() == 0) + { + Log("SHUTTLE","StoreRunMetaDataFile - LHCPeriod not found in logbook!"); + return 0; + } + + TString target = Form("%s/GRP/RunMetadata/alice/data/%d/%s/%09d/Raw/%s", + localBaseFolder.Data(), GetCurrentYear(), + lhcPeriod.Data(), GetCurrentRun(), gridFileName); + + return CopyFileLocally(localFile, target); +} + +//______________________________________________________________________________________________ +Bool_t AliShuttle::CopyFileLocally(const char* localFile, const TString& target) +{ + // + // Stores file locally. Called by StoreReferenceFile and StoreRunMetadataFile + // Files are temporarily stored in the local reference storage. When the preprocessor + // finishes, the Shuttle calls CopyFilesToGrid to transfer the files to AliEn + // (in reference or run level folders) + // + + TString targetDir(target(0, target.Last('/'))); + + //try to open base dir folder, if it does not exist void* dir = gSystem->OpenDirectory(targetDir.Data()); if (dir == NULL) { if (gSystem->mkdir(targetDir.Data(), kTRUE)) { - Log("SHUTTLE", Form("Can't open directory <%s>", targetDir.Data())); + Log("SHUTTLE", Form("StoreFileLocally - Can't open directory <%s>", targetDir.Data())); return kFALSE; } } else { gSystem->FreeDirectory(dir); } - - TString target; - target.Form("%s/%d_%s", targetDir.Data(), GetCurrentRun(), gridFileName); - Int_t result = gSystem->GetPathInfo(localFile, 0, (Long64_t*) 0, 0, 0); + Int_t result = 0; + + result = gSystem->GetPathInfo(localFile, 0, (Long64_t*) 0, 0, 0); if (result) { - Log("SHUTTLE", Form("StoreReferenceFile - %s does not exist", localFile)); + Log("SHUTTLE", Form("StoreFileLocally - %s does not exist", localFile)); return kFALSE; } + result = gSystem->GetPathInfo(target, 0, (Long64_t*) 0, 0, 0); + if (!result) + { + Log("SHUTTLE", Form("StoreFileLocally - target file %s already exist, removing...", target.Data())); + if (gSystem->Unlink(target.Data())) + { + Log("SHUTTLE", Form("StoreFileLocally - Could not remove existing target file %s!", target.Data())); + return kFALSE; + } + } + result = gSystem->CopyFile(localFile, target); if (result == 0) { - Log("SHUTTLE", Form("StoreReferenceFile - File %s stored locally to %s", localFile, target.Data())); + Log("SHUTTLE", Form("StoreFileLocally - File %s stored locally to %s", localFile, target.Data())); return kTRUE; } else { - Log("SHUTTLE", Form("StoreReferenceFile - Could not store file %s to %s!. Error code = %d", + Log("SHUTTLE", Form("StoreFileLocally - Could not store file %s to %s! Error code = %d", localFile, target.Data(), result)); return kFALSE; } + + + } //______________________________________________________________________________________________ -Bool_t AliShuttle::StoreRefFilesToGrid() +Bool_t AliShuttle::CopyFilesToGrid(const char* type) { // - // Transfers the reference file to the Grid. + // Transfers local files to the Grid. Local files can be reference files + // or run metadata file (from GRP only). // - // The files are stored under the following location: - // //_ + // According to the type (ref, metadata) the files are stored under the following location: + // ref --> //_ + // metadata --> / // - + AliCDBManager* man = AliCDBManager::Instance(); AliCDBStorage* sto = man->GetStorage(fgkLocalRefStorage); if (!sto) return kFALSE; TString localBaseFolder = sto->GetBaseFolder(); - - TString dir = GetRefFilePrefix(localBaseFolder.Data(), fCurrentDetector.Data()); - - AliCDBStorage* gridSto = man->GetStorage(fgkMainRefStorage); - if (!gridSto) - return kFALSE; - - TString gridBaseFolder = gridSto->GetBaseFolder(); - - TString alienDir = GetRefFilePrefix(gridBaseFolder.Data(), fCurrentDetector.Data()); + TString dir; + TString alienDir; TString begin; - begin.Form("%d_", GetCurrentRun()); + if (strcmp(type, "reference") == 0) + { + dir = GetRefFilePrefix(localBaseFolder.Data(), fCurrentDetector.Data()); + AliCDBStorage* gridSto = man->GetStorage(fgkMainRefStorage); + if (!gridSto) + return kFALSE; + TString gridBaseFolder = gridSto->GetBaseFolder(); + alienDir = GetRefFilePrefix(gridBaseFolder.Data(), fCurrentDetector.Data()); + begin = Form("%d_", GetCurrentRun()); + } + else if (strcmp(type, "metadata") == 0) + { + + TString lhcPeriod = GetLHCPeriod(); + + if (lhcPeriod.Length() == 0) + { + Log("SHUTTLE","CopyFilesToGrid - LHCPeriod not found in logbook!"); + return 0; + } + + dir = Form("%s/GRP/RunMetadata/alice/data/%d/%s/%09d/Raw", + localBaseFolder.Data(), GetCurrentYear(), + lhcPeriod.Data(), GetCurrentRun()); + alienDir = dir(dir.Index("/alice/data/"), dir.Length()); + + begin = ""; + } + else + { + Log("SHUTTLE", "CopyFilesToGrid - Unexpected: type label must be reference or metadata!"); + return kFALSE; + } + TSystemDirectory* baseDir = new TSystemDirectory("/", dir); if (!baseDir) return kTRUE; @@ -769,7 +1004,7 @@ Bool_t AliShuttle::StoreRefFilesToGrid() if (!gGrid) { - Log("SHUTTLE", "Connection to Grid failed: Cannot continue!"); + Log("SHUTTLE", "CopyFilesToGrid - Connection to Grid failed: Cannot continue!"); delete dirList; return kFALSE; } @@ -795,7 +1030,7 @@ Bool_t AliShuttle::StoreRefFilesToGrid() if (first) { first = kFALSE; - // check that DET folder exists, otherwise create it + // check that folder exists, otherwise create it TGridResult* result = gGrid->Ls(alienDir.Data(), "a"); if (!result) @@ -806,18 +1041,20 @@ Bool_t AliShuttle::StoreRefFilesToGrid() if (!result->GetFileName(1)) // TODO: It looks like element 0 is always 0!! { - if (!gGrid->Mkdir(alienDir.Data(),"",0)) + // TODO It does not work currently! Bug in TAliEn::Mkdir + // TODO Manually fixed in local root v5-16-00 + if (!gGrid->Mkdir(alienDir.Data(),"-p",0)) { - Log("SHUTTLE", Form("StoreRefFilesToGrid - Cannot create directory %s", + Log("SHUTTLE", Form("CopyFilesToGrid - Cannot create directory %s", alienDir.Data())); delete dirList; return kFALSE; } else { - Log("SHUTTLE",Form("Folder %s created", alienDir.Data())); + Log("SHUTTLE",Form("CopyFilesToGrid - Folder %s created", alienDir.Data())); } } else { - Log("SHUTTLE",Form("Folder %s found", alienDir.Data())); + Log("SHUTTLE",Form("CopyFilesToGrid - Folder %s found", alienDir.Data())); } } @@ -827,23 +1064,25 @@ Bool_t AliShuttle::StoreRefFilesToGrid() TString fullGridPath; fullGridPath.Form("alien://%s/%s", alienDir.Data(), fileName.Data()); - TFileMerger fileMerger; - Bool_t result = fileMerger.Cp(fullLocalPath, fullGridPath); + Bool_t result = TFile::Cp(fullLocalPath, fullGridPath); if (result) { - Log("SHUTTLE", Form("StoreRefFilesToGrid - Copying local file %s to %s succeeded!", fullLocalPath.Data(), fullGridPath.Data())); + Log("SHUTTLE", Form("CopyFilesToGrid - Copying local file %s to %s succeeded!", + fullLocalPath.Data(), fullGridPath.Data())); RemoveFile(fullLocalPath); nTransfer++; } else { - Log("SHUTTLE", Form("StoreRefFilesToGrid - Copying local file %s to %s FAILED!", fullLocalPath.Data(), fullGridPath.Data())); + Log("SHUTTLE", Form("CopyFilesToGrid - Copying local file %s to %s FAILED!", + fullLocalPath.Data(), fullGridPath.Data())); success = kFALSE; } } - Log("SHUTTLE", Form("StoreRefFilesToGrid - %d (over %d) reference files in folder %s copied to Grid.", nTransfer, nDirs, dir.Data())); + Log("SHUTTLE", Form("CopyFilesToGrid - %d (over %d) files in folder %s copied to Grid.", + nTransfer, nDirs, dir.Data())); delete dirList; @@ -870,6 +1109,7 @@ const char* AliShuttle::GetRefFilePrefix(const char* base, const char* detector) } + //______________________________________________________________________________________________ void AliShuttle::CleanLocalStorage(const TString& uri) { @@ -900,9 +1140,9 @@ void AliShuttle::CleanLocalStorage(const TString& uri) TString filename(Form("%s/%s/*/Run*_v%d_s*.root", localSto->GetBaseFolder().Data(), GetOfflineDetName(fCurrentDetector.Data()), GetCurrentRun())); - AliInfo(Form("filename = %s", filename.Data())); + AliDebug(2, Form("filename = %s", filename.Data())); - AliInfo(Form("Removing remaining local files from run %d and detector %s ...", + Log("SHUTTLE", Form("Removing remaining local files for run %d and detector %s ...", GetCurrentRun(), fCurrentDetector.Data())); RemoveFile(filename.Data()); @@ -1000,7 +1240,7 @@ void AliShuttle::UpdateShuttleStatus(AliShuttleStatus::Status newStatus, Bool_t AliShuttleStatus* status = dynamic_cast (fStatusEntry->GetObject()); if (!status){ - Log("SHUTTLE", "UNEXPECTED: status could not be read from current CDB entry"); + Log("SHUTTLE", "UpdateShuttleStatus - UNEXPECTED: status could not be read from current CDB entry"); return; } @@ -1040,7 +1280,9 @@ void AliShuttle::SendMLInfo() mlList.Add(&mlStatus); mlList.Add(&mlRetryCount); - fMonaLisa->SendParameters(&mlList); + TString mlID; + mlID.Form("%d", GetCurrentRun()); + fMonaLisa->SendParameters(&mlList, mlID); } //______________________________________________________________________________________________ @@ -1056,7 +1298,7 @@ Bool_t AliShuttle::ContinueProcessing() dynamic_cast (fPreprocessorMap.GetValue(fCurrentDetector)); if (!aPreprocessor) { - AliInfo(Form("%s: no preprocessor registered", fCurrentDetector.Data())); + Log("SHUTTLE", Form("ContinueProcessing - %s: no preprocessor registered", fCurrentDetector.Data())); return kFALSE; } @@ -1064,7 +1306,7 @@ Bool_t AliShuttle::ContinueProcessing() fLogbookEntry->GetDetectorStatus(fCurrentDetector); if(entryStatus != AliShuttleLogbookEntry::kUnprocessed) { - AliInfo(Form("ContinueProcessing - %s is %s", + Log("SHUTTLE", Form("ContinueProcessing - %s is %s", fCurrentDetector.Data(), fLogbookEntry->GetDetectorStatusName(entryStatus))); return kFALSE; @@ -1078,12 +1320,16 @@ Bool_t AliShuttle::ContinueProcessing() { if (fTestMode == kNone) { - Log("SHUTTLE", Form("ContinueProcessing - %s requires strict run ordering but this is not the first unprocessed run!")); + Log("SHUTTLE", Form("ContinueProcessing - %s requires strict run ordering" + " but this is not the first unprocessed run!")); return kFALSE; } else { - Log("SHUTTLE", Form("ContinueProcessing - In TESTMODE - Although %s requires strict run ordering and this is not the first unprocessed run, the SHUTTLE continues")); + Log("SHUTTLE", Form("ContinueProcessing - In TESTMODE - " + "Although %s requires strict run ordering " + "and this is not the first unprocessed run, " + "the SHUTTLE continues")); } } @@ -1110,14 +1356,14 @@ Bool_t AliShuttle::ContinueProcessing() if (status->GetStatus() == AliShuttleStatus::kStoreError) { Log("SHUTTLE", - Form("ContinueProcessing - %s: Grid storage of one or more objects failed. Trying again now", + Form("ContinueProcessing - %s: Grid storage of one or more " + "objects failed. Trying again now", fCurrentDetector.Data())); UpdateShuttleStatus(AliShuttleStatus::kStoreStarted); if (StoreOCDB()){ - Log("SHUTTLE", Form("ContinueProcessing - %s: all objects successfully stored into main storage", + Log("SHUTTLE", Form("ContinueProcessing - %s: all objects " + "successfully stored into main storage", fCurrentDetector.Data())); - UpdateShuttleStatus(AliShuttleStatus::kDone); - UpdateShuttleLogbook(fCurrentDetector.Data(), "DONE"); } else { Log("SHUTTLE", Form("ContinueProcessing - %s: Grid storage failed again", @@ -1147,7 +1393,8 @@ Bool_t AliShuttle::ContinueProcessing() // UpdateTableFailCase(); // Send mail to detector expert! - AliInfo(Form("Sending mail to %s expert...", fCurrentDetector.Data())); + Log("SHUTTLE", Form("ContinueProcessing - Sending mail to %s expert...", + fCurrentDetector.Data())); if (!SendMail()) Log("SHUTTLE", Form("ContinueProcessing - Could not send mail to %s expert", fCurrentDetector.Data())); @@ -1157,8 +1404,10 @@ Bool_t AliShuttle::ContinueProcessing() "Aborted before with %s. Retry number %d.", fCurrentDetector.Data(), status->GetStatusName(), status->GetCount())); Bool_t increaseCount = kTRUE; - if (status->GetStatus() == AliShuttleStatus::kDCSError || status->GetStatus() == AliShuttleStatus::kDCSStarted) - increaseCount = kFALSE; + if (status->GetStatus() == AliShuttleStatus::kDCSError || + status->GetStatus() == AliShuttleStatus::kDCSStarted) + increaseCount = kFALSE; + UpdateShuttleStatus(AliShuttleStatus::kStarted, increaseCount); cont = kTRUE; } @@ -1180,14 +1429,9 @@ Bool_t AliShuttle::Process(AliShuttleLogbookEntry* entry) fLogbookEntry = entry; - AliInfo(Form("\n\n \t\t\t^*^*^*^*^*^*^*^*^*^*^*^* run %d: START ^*^*^*^*^*^*^*^*^*^*^*^* \n", + Log("SHUTTLE", Form("\t\t\t^*^*^*^*^*^*^*^*^*^*^*^* run %d: START ^*^*^*^*^*^*^*^*^*^*^*^*", GetCurrentRun())); - // create ML instance that monitors this run - fMonaLisa = new TMonaLisaWriter(Form("%d", GetCurrentRun()), "SHUTTLE", "aliendb1.cern.ch"); - // disable monitoring of other parameters that come e.g. from TFile - gMonitoringWriter = 0; - // Send the information to ML TMonaLisaText mlStatus("SHUTTLE_status", "Processing"); TMonaLisaText mlRunType("SHUTTLE_runtype", Form("%s (%s)", entry->GetRunType(), entry->GetRunParameter("log"))); @@ -1196,7 +1440,9 @@ Bool_t AliShuttle::Process(AliShuttleLogbookEntry* entry) mlList.Add(&mlStatus); mlList.Add(&mlRunType); - fMonaLisa->SendParameters(&mlList); + TString mlID; + mlID.Form("%d", GetCurrentRun()); + fMonaLisa->SendParameters(&mlList, mlID); if (fLogbookEntry->IsDone()) { @@ -1230,7 +1476,7 @@ Bool_t AliShuttle::Process(AliShuttleLogbookEntry* entry) Int_t testMode = tmpStr->String().Atoi(); if (testMode > 0) { - Log("SHUTTLE", Form("Enabling test mode %d", testMode)); + Log("SHUTTLE", Form("Process - Enabling test mode %d", testMode)); SetTestMode((TestMode) testMode); } } @@ -1238,14 +1484,28 @@ Bool_t AliShuttle::Process(AliShuttleLogbookEntry* entry) } } } - - Log("SHUTTLE", Form("The test mode flag is %d", (Int_t) fTestMode)); - + fLogbookEntry->Print("all"); // Initialization Bool_t hasError = kFALSE; + // Set the CDB and Reference folders according to the year and LHC period + TString lhcPeriod(GetLHCPeriod()); + if (lhcPeriod.Length() == 0) + { + Log("SHUTTLE","Process - LHCPeriod not found in logbook!"); + return 0; + } + + if (fgkMainCDB.Length() == 0) + fgkMainCDB = Form("alien://folder=/alice/data/%d/%s/OCDB?user=alidaq?cacheFold=/tmp/OCDBCache", + GetCurrentYear(), lhcPeriod.Data()); + + if (fgkMainRefStorage.Length() == 0) + fgkMainRefStorage = Form("alien://folder=/alice/data/%d/%s/Reference?user=alidaq?cacheFold=/tmp/OCDBCache", + GetCurrentYear(), lhcPeriod.Data()); + AliCDBStorage *mainCDBSto = AliCDBManager::Instance()->GetStorage(fgkMainCDB); if(mainCDBSto) mainCDBSto->QueryCDB(GetCurrentRun()); AliCDBStorage *mainRefSto = AliCDBManager::Instance()->GetStorage(fgkMainRefStorage); @@ -1261,23 +1521,23 @@ Bool_t AliShuttle::Process(AliShuttleLogbookEntry* entry) if (ContinueProcessing() == kFALSE) continue; - AliInfo(Form("\n\n \t\t\t****** run %d - %s: START ******", + Log("SHUTTLE", Form("\t\t\t****** run %d - %s: START ******", GetCurrentRun(), aDetector->GetName())); for(Int_t iSys=0;iSys<3;iSys++) fFXSCalled[iSys]=kFALSE; - Log(fCurrentDetector.Data(), "Starting processing"); + Log(fCurrentDetector.Data(), "Process - Starting processing"); Int_t pid = fork(); if (pid < 0) { - Log("SHUTTLE", "ERROR: Forking failed"); + Log("SHUTTLE", "Process - ERROR: Forking failed"); } else if (pid > 0) { // parent - AliInfo(Form("In parent process of %d - %s: Starting monitoring", + Log("SHUTTLE", Form("Process - In parent process of %d - %s: Starting monitoring", GetCurrentRun(), aDetector->GetName())); Long_t begin = time(0); @@ -1290,8 +1550,9 @@ Bool_t AliShuttle::Process(AliShuttleLogbookEntry* entry) if (expiredTime > fConfig->GetPPTimeOut()) { TString tmp; - tmp.Form("Process of %s time out. Run time: %d seconds. Killing...", - fCurrentDetector.Data(), expiredTime); + tmp.Form("Process - Process of %s time out. " + "Run time: %d seconds. Killing...", + fCurrentDetector.Data(), expiredTime); Log("SHUTTLE", tmp); Log(fCurrentDetector, tmp); @@ -1311,14 +1572,15 @@ Bool_t AliShuttle::Process(AliShuttleLogbookEntry* entry) FILE* pipe = gSystem->OpenPipe(checkStr, "r"); if (!pipe) { - Log("SHUTTLE", Form("Error: Could not open pipe to %s", checkStr.Data())); + Log("SHUTTLE", Form("Process - Error: " + "Could not open pipe to %s", checkStr.Data())); continue; } char buffer[100]; if (!fgets(buffer, 100, pipe)) { - Log("SHUTTLE", "Error: ps did not return anything"); + Log("SHUTTLE", "Process - Error: ps did not return anything"); gSystem->ClosePipe(pipe); continue; } @@ -1329,18 +1591,23 @@ Bool_t AliShuttle::Process(AliShuttleLogbookEntry* entry) Int_t mem = 0; if ((sscanf(buffer, "%d\n", &mem) != 1) || !mem) { - Log("SHUTTLE", "Error: Could not parse output of ps"); + Log("SHUTTLE", "Process - Error: Could not parse output of ps"); continue; } if (expiredTime % 60 == 0) - Log("SHUTTLE", Form("%s: Checking process. Run time: %d seconds - Memory consumption: %d KB", - fCurrentDetector.Data(), expiredTime, mem)); + { + Log("SHUTTLE", Form("Process - %s: Checking process. " + "Run time: %d seconds - Memory consumption: %d KB", + fCurrentDetector.Data(), expiredTime, mem)); + SendAlive(); + } if (mem > fConfig->GetPPMaxMem()) { TString tmp; - tmp.Form("Process exceeds maximum allowed memory (%d KB > %d KB). Killing...", + tmp.Form("Process - Process exceeds maximum allowed memory " + "(%d KB > %d KB). Killing...", mem, fConfig->GetPPMaxMem()); Log("SHUTTLE", tmp); Log(fCurrentDetector, tmp); @@ -1355,14 +1622,14 @@ Bool_t AliShuttle::Process(AliShuttleLogbookEntry* entry) } } - AliInfo(Form("In parent process of %d - %s: Client has terminated.", + Log("SHUTTLE", Form("Process - In parent process of %d - %s: Client has terminated.", GetCurrentRun(), aDetector->GetName())); if (WIFEXITED(status)) { Int_t returnCode = WEXITSTATUS(status); - Log("SHUTTLE", Form("%s: the return code is %d", fCurrentDetector.Data(), + Log("SHUTTLE", Form("Process - %s: the return code is %d", fCurrentDetector.Data(), returnCode)); if (returnCode == 0) hasError = kTRUE; @@ -1371,43 +1638,82 @@ Bool_t AliShuttle::Process(AliShuttleLogbookEntry* entry) else if (pid == 0) { // client - AliInfo(Form("In client process of %d - %s", GetCurrentRun(), aDetector->GetName())); + Log("SHUTTLE", Form("Process - In client process of %d - %s", GetCurrentRun(), + aDetector->GetName())); - AliInfo("Redirecting output..."); + Log("SHUTTLE", Form("Process - Redirecting output to %s log",fCurrentDetector.Data())); if ((freopen(GetLogFileName(fCurrentDetector), "a", stdout)) == 0) { - Log("SHUTTLE", "Could not freopen stdout"); + Log("SHUTTLE", "Process - Could not freopen stdout"); } else { fOutputRedirected = kTRUE; if ((dup2(fileno(stdout), fileno(stderr))) < 0) - Log("SHUTTLE", "Could not redirect stderr"); + Log("SHUTTLE", "Process - Could not redirect stderr"); } + TString wd = gSystem->WorkingDirectory(); + TString tmpDir = Form("%s/%s_%d_process", GetShuttleTempDir(), + fCurrentDetector.Data(), GetCurrentRun()); + + Int_t result = gSystem->GetPathInfo(tmpDir.Data(), 0, (Long64_t*) 0, 0, 0); + if (!result) // temp dir already exists! + { + Log(fCurrentDetector.Data(), + Form("Process - %s dir already exists! Removing...", tmpDir.Data())); + gSystem->Exec(Form("rm -rf %s",tmpDir.Data())); + } + + if (gSystem->mkdir(tmpDir.Data(), 1)) + { + Log(fCurrentDetector.Data(), "Process - could not make temp directory!!"); + gSystem->Exit(1); + } + + if (!gSystem->ChangeDirectory(tmpDir.Data())) + { + Log(fCurrentDetector.Data(), "Process - could not change directory!!"); + gSystem->Exit(1); + } + Bool_t success = ProcessCurrentDetector(); + + gSystem->ChangeDirectory(wd.Data()); + if (success) // Preprocessor finished successfully! { + // remove temporary folder + gSystem->Exec(Form("rm -rf %s",tmpDir.Data())); + // Update time_processed field in FXS DB if (UpdateTable() == kFALSE) - Log("SHUTTLE", Form("Process - %s: Could not update FXS databases!")); + Log("SHUTTLE", Form("Process - %s: Could not update FXS databases!", + fCurrentDetector.Data())); // Transfer the data from local storage to main storage (Grid) UpdateShuttleStatus(AliShuttleStatus::kStoreStarted); if (StoreOCDB() == kFALSE) { - AliInfo(Form("\n \t\t\t****** run %d - %s: STORAGE ERROR ****** \n\n", + Log("SHUTTLE", + Form("\t\t\t****** run %d - %s: STORAGE ERROR ******", GetCurrentRun(), aDetector->GetName())); UpdateShuttleStatus(AliShuttleStatus::kStoreError); success = kFALSE; } else { - AliInfo(Form("\n \t\t\t****** run %d - %s: DONE ****** \n\n", + Log("SHUTTLE", + Form("\t\t\t****** run %d - %s: DONE ******", GetCurrentRun(), aDetector->GetName())); UpdateShuttleStatus(AliShuttleStatus::kDone); UpdateShuttleLogbook(fCurrentDetector, "DONE"); } + } else + { + Log("SHUTTLE", + Form("\t\t\t****** run %d - %s: PP ERROR ******", + GetCurrentRun(), aDetector->GetName())); } for (UInt_t iSys=0; iSys<3; iSys++) @@ -1415,7 +1721,7 @@ Bool_t AliShuttle::Process(AliShuttleLogbookEntry* entry) if (fFXSCalled[iSys]) fFXSlist[iSys].Clear(); } - AliInfo(Form("Client process of %d - %s is exiting now with %d.", + Log("SHUTTLE", Form("Process - Client process of %d - %s is exiting now with %d.", GetCurrentRun(), aDetector->GetName(), success)); // the client exits here @@ -1425,14 +1731,15 @@ Bool_t AliShuttle::Process(AliShuttleLogbookEntry* entry) } } - AliInfo(Form("\n\n \t\t\t^*^*^*^*^*^*^*^*^*^*^*^* run %d: FINISH ^*^*^*^*^*^*^*^*^*^*^*^* \n", + Log("SHUTTLE", Form("\t\t\t^*^*^*^*^*^*^*^*^*^*^*^* run %d: FINISH ^*^*^*^*^*^*^*^*^*^*^*^*", GetCurrentRun())); //check if shuttle is done for this run, if so update logbook TObjArray checkEntryArray; checkEntryArray.SetOwner(1); TString whereClause = Form("where run=%d", GetCurrentRun()); - if (!QueryShuttleLogbook(whereClause.Data(), checkEntryArray) || checkEntryArray.GetEntries() == 0) { + if (!QueryShuttleLogbook(whereClause.Data(), checkEntryArray) || + checkEntryArray.GetEntries() == 0) { Log("SHUTTLE", Form("Process - Warning: Cannot check status of run %d on Shuttle logbook!", GetCurrentRun())); return hasError == kFALSE; @@ -1462,10 +1769,6 @@ Bool_t AliShuttle::Process(AliShuttleLogbookEntry* entry) } } - // remove ML instance - delete fMonaLisa; - fMonaLisa = 0; - fLogbookEntry = 0; return hasError == kFALSE; @@ -1478,15 +1781,17 @@ Bool_t AliShuttle::ProcessCurrentDetector() // Makes data retrieval just for a specific detector (fCurrentDetector). // Threre should be a configuration for this detector. - AliInfo(Form("Retrieving values for %s, run %d", fCurrentDetector.Data(), GetCurrentRun())); + Log("SHUTTLE", Form("ProcessCurrentDetector - Retrieving values for %s, run %d", + fCurrentDetector.Data(), GetCurrentRun())); + TString wd = gSystem->WorkingDirectory(); + if (!CleanReferenceStorage(fCurrentDetector.Data())) return kFALSE; - - TMap dcsMap; - dcsMap.SetOwner(1); - - Bool_t aDCSError = kFALSE; + + gSystem->ChangeDirectory(wd.Data()); + + TMap* dcsMap = new TMap(); // call preprocessor AliPreprocessor* aPreprocessor = @@ -1498,92 +1803,126 @@ Bool_t AliShuttle::ProcessCurrentDetector() if (!processDCS) { - Log(fCurrentDetector, "The preprocessor requested to skip the retrieval of DCS values"); + Log(fCurrentDetector, "ProcessCurrentDetector -" + " The preprocessor requested to skip the retrieval of DCS values"); } else if (fTestMode & kSkipDCS) { - Log(fCurrentDetector, "In TESTMODE - Skipping DCS processing!"); + Log(fCurrentDetector, "ProcessCurrentDetector - In TESTMODE: Skipping DCS processing"); } else if (fTestMode & kErrorDCS) { - Log(fCurrentDetector, "In TESTMODE - Simulating DCS error"); + Log(fCurrentDetector, "ProcessCurrentDetector - In TESTMODE: Simulating DCS error"); UpdateShuttleStatus(AliShuttleStatus::kDCSStarted); UpdateShuttleStatus(AliShuttleStatus::kDCSError); + delete dcsMap; return kFALSE; } else { UpdateShuttleStatus(AliShuttleStatus::kDCSStarted); - TString host(fConfig->GetDCSHost(fCurrentDetector)); - Int_t port = fConfig->GetDCSPort(fCurrentDetector); - - // Retrieval of Aliases - TObjString* anAlias = 0; - Int_t iAlias = 0; - Int_t nTotAliases= ((TMap*)fConfig->GetDCSAliases(fCurrentDetector))->GetEntries(); - TIter iterAliases(fConfig->GetDCSAliases(fCurrentDetector)); - while ((anAlias = (TObjString*) iterAliases.Next())) + // Query DCS archive + Int_t nServers = fConfig->GetNServers(fCurrentDetector); + + for (int iServ=0; iServSetOwner(1); - - iAlias++; - aDCSError = (GetValueSet(host, port, anAlias->String(), valueSet, kAlias) == 0); + + TString host(fConfig->GetDCSHost(fCurrentDetector, iServ)); + Int_t port = fConfig->GetDCSPort(fCurrentDetector, iServ); + Int_t multiSplit = fConfig->GetMultiSplit(fCurrentDetector, iServ); - if(!aDCSError) + Log(fCurrentDetector, Form("ProcessCurrentDetector -" + " Querying DCS Amanda server %s:%d (%d of %d)", + host.Data(), port, iServ+1, nServers)); + + TMap* aliasMap = 0; + TMap* dpMap = 0; + + if (fConfig->GetDCSAliases(fCurrentDetector, iServ)->GetEntries() > 0) { - if (((iAlias-1) % 500) == 0 || iAlias == nTotAliases) - AliInfo(Form("Alias %s (%d of %d) - %d values collected", - anAlias->GetName(), iAlias, nTotAliases, valueSet->GetEntriesFast())); - dcsMap.Add(anAlias->Clone(), valueSet); - } else { - Log(fCurrentDetector, - Form("ProcessCurrentDetector - Error while retrieving alias %s", - anAlias->GetName())); - UpdateShuttleStatus(AliShuttleStatus::kDCSError); - dcsMap.DeleteAll(); - return kFALSE; - } - } + aliasMap = GetValueSet(host, port, + fConfig->GetDCSAliases(fCurrentDetector, iServ), + kAlias, multiSplit); + if (!aliasMap) + { + Log(fCurrentDetector, + Form("ProcessCurrentDetector -" + " Error retrieving DCS aliases from server %s." + " Sending mail to DCS experts!", host.Data())); + UpdateShuttleStatus(AliShuttleStatus::kDCSError); + + //if (!SendMailToDCS()) + // Log("SHUTTLE", Form("ProcessCurrentDetector - Could not send mail to DCS experts!")); - // Retrieval of Data Points - TObjString* aDP = 0; - Int_t iDP = 0; - Int_t nTotDPs= ((TMap*)fConfig->GetDCSDataPoints(fCurrentDetector))->GetEntries(); - TIter iterDP(fConfig->GetDCSDataPoints(fCurrentDetector)); - while ((aDP = (TObjString*) iterDP.Next())) - { - TObjArray *valueSet = new TObjArray(); - valueSet->SetOwner(1); - if (((iDP-1) % 500) == 0 || iDP == nTotDPs) - AliInfo(Form("Querying DCS archive: DP %s (%d of %d)", - aDP->GetName(), iDP++, nTotDPs)); - aDCSError = (GetValueSet(host, port, aDP->String(), valueSet, kDP) == 0); - - if(!aDCSError) + delete dcsMap; + return kFALSE; + } + } + + if (fConfig->GetDCSDataPoints(fCurrentDetector, iServ)->GetEntries() > 0) { - dcsMap.Add(aDP->Clone(), valueSet); - } else { - Log(fCurrentDetector, - Form("ProcessCurrentDetector - Error while retrieving data point %s", - aDP->GetName())); - UpdateShuttleStatus(AliShuttleStatus::kDCSError); - dcsMap.DeleteAll(); - return kFALSE; + dpMap = GetValueSet(host, port, + fConfig->GetDCSDataPoints(fCurrentDetector, iServ), + kDP, multiSplit); + if (!dpMap) + { + Log(fCurrentDetector, + Form("ProcessCurrentDetector -" + " Error retrieving DCS data points from server %s." + " Sending mail to DCS experts!", host.Data())); + UpdateShuttleStatus(AliShuttleStatus::kDCSError); + + //if (!SendMailToDCS()) + // Log("SHUTTLE", Form("ProcessCurrentDetector - Could not send mail to DCS experts!")); + + if (aliasMap) delete aliasMap; + delete dcsMap; + return kFALSE; + } + } + + // merge aliasMap and dpMap into dcsMap + if(aliasMap) { + TIter iter(aliasMap); + TObjString* key = 0; + while ((key = (TObjString*) iter.Next())) + dcsMap->Add(key, aliasMap->GetValue(key->String())); + + aliasMap->SetOwner(kFALSE); + delete aliasMap; + } + + if(dpMap) { + TIter iter(dpMap); + TObjString* key = 0; + while ((key = (TObjString*) iter.Next())) + dcsMap->Add(key, dpMap->GetValue(key->String())); + + dpMap->SetOwner(kFALSE); + delete dpMap; } } } - + + // save map into file, to help debugging in case of preprocessor error + TFile* f = TFile::Open("DCSMap.root","recreate"); + f->cd(); + dcsMap->Write("DCSMap", TObject::kSingleKey); + f->Close(); + delete f; + // DCS Archive DB processing successful. Call Preprocessor! UpdateShuttleStatus(AliShuttleStatus::kPPStarted); - UInt_t returnValue = aPreprocessor->Process(&dcsMap); + UInt_t returnValue = aPreprocessor->Process(dcsMap); if (returnValue > 0) // Preprocessor error! { - Log(fCurrentDetector, Form("Preprocessor failed. Process returned %d.", returnValue)); + Log(fCurrentDetector, Form("ProcessCurrentDetector - " + "Preprocessor failed. Process returned %d.", returnValue)); UpdateShuttleStatus(AliShuttleStatus::kPPError); - dcsMap.DeleteAll(); + dcsMap->DeleteAll(); + delete dcsMap; return kFALSE; } @@ -1592,7 +1931,8 @@ Bool_t AliShuttle::ProcessCurrentDetector() Log(fCurrentDetector, Form("ProcessCurrentDetector - %s preprocessor returned success", fCurrentDetector.Data())); - dcsMap.DeleteAll(); + dcsMap->DeleteAll(); + delete dcsMap; return kTRUE; } @@ -1622,15 +1962,15 @@ Bool_t AliShuttle::QueryShuttleLogbook(const char* whereClause, AliDebug(2,Form("Query = %s", sqlQuery.Data())); if(aResult->GetRowCount() == 0) { - AliInfo("No entries in Shuttle Logbook match request"); + Log("SHUTTLE", "No entries in Shuttle Logbook match request"); delete aResult; return kTRUE; } // TODO Check field count! - const UInt_t nCols = 22; + const UInt_t nCols = 23; if (aResult->GetFieldCount() != (Int_t) nCols) { - AliError("Invalid SQL result field number!"); + Log("SHUTTLE", "Invalid SQL result field number!"); delete aResult; return kFALSE; } @@ -1672,7 +2012,7 @@ AliShuttleLogbookEntry* AliShuttle::QueryRunParameters(Int_t run) TSQLResult* aResult = fServer[3]->Query(sqlQuery); if (!aResult) { - AliError(Form("Can't execute query <%s>!", sqlQuery.Data())); + Log("SHUTTLE", Form("Can't execute query <%s>!", sqlQuery.Data())); return 0; } @@ -1683,7 +2023,8 @@ AliShuttleLogbookEntry* AliShuttle::QueryRunParameters(Int_t run) } if (aResult->GetRowCount() > 1) { - AliError(Form("More than one entry in DAQ Logbook for run %d. Skipping", run)); + Log("SHUTTLE", Form("QueryRunParameters - UNEXPECTED: " + "more than one entry in DAQ Logbook for run %d!", run)); delete aResult; return 0; } @@ -1691,7 +2032,7 @@ AliShuttleLogbookEntry* AliShuttle::QueryRunParameters(Int_t run) TSQLRow* aRow = aResult->Next(); if (!aRow) { - AliError(Form("Could not retrieve row for run %d. Skipping", run)); + Log("SHUTTLE", Form("QueryRunParameters - Could not retrieve row for run %d. Skipping", run)); delete aResult; return 0; } @@ -1704,10 +2045,106 @@ AliShuttleLogbookEntry* AliShuttle::QueryRunParameters(Int_t run) UInt_t startTime = entry->GetStartTime(); UInt_t endTime = entry->GetEndTime(); - if (!startTime || !endTime || startTime > endTime) { +// if (!startTime || !endTime || startTime > endTime) +// { +// Log("SHUTTLE", +// Form("QueryRunParameters - Invalid parameters for Run %d: startTime = %d, endTime = %d. Skipping!", +// run, startTime, endTime)); +// +// Log("SHUTTLE", Form("Marking SHUTTLE done for run %d", run)); +// fLogbookEntry = entry; +// if (!UpdateShuttleLogbook("shuttle_done")) +// { +// AliError(Form("Could not update logbook for run %d !", run)); +// } +// fLogbookEntry = 0; +// +// delete entry; +// delete aRow; +// delete aResult; +// return 0; +// } + + if (!startTime) + { Log("SHUTTLE", - Form("QueryRunParameters - Invalid parameters for Run %d: startTime = %d, endTime = %d", - run, startTime, endTime)); + Form("QueryRunParameters - Invalid parameters for Run %d: " + "startTime = %d, endTime = %d. Skipping!", + run, startTime, endTime)); + + Log("SHUTTLE", Form("Marking SHUTTLE done for run %d", run)); + fLogbookEntry = entry; + if (!UpdateShuttleLogbook("shuttle_ignored")) + { + AliError(Form("Could not update logbook for run %d !", run)); + } + fLogbookEntry = 0; + + delete entry; + delete aRow; + delete aResult; + return 0; + } + + if (startTime && !endTime) + { + // TODO Here we don't mark SHUTTLE done, because this may mean + //the run is still ongoing!! + Log("SHUTTLE", + Form("QueryRunParameters - Invalid parameters for Run %d: " + "startTime = %d, endTime = %d. Skipping (Shuttle won't be marked as DONE)!", + run, startTime, endTime)); + + //Log("SHUTTLE", Form("Marking SHUTTLE done for run %d", run)); + //fLogbookEntry = entry; + //if (!UpdateShuttleLogbook("shuttle_done")) + //{ + // AliError(Form("Could not update logbook for run %d !", run)); + //} + //fLogbookEntry = 0; + + delete entry; + delete aRow; + delete aResult; + return 0; + } + + if (startTime && endTime && (startTime > endTime)) + { + Log("SHUTTLE", + Form("QueryRunParameters - Invalid parameters for Run %d: " + "startTime = %d, endTime = %d. Skipping!", + run, startTime, endTime)); + + Log("SHUTTLE", Form("Marking SHUTTLE done for run %d", run)); + fLogbookEntry = entry; + if (!UpdateShuttleLogbook("shuttle_ignored")) + { + AliError(Form("Could not update logbook for run %d !", run)); + } + fLogbookEntry = 0; + + delete entry; + delete aRow; + delete aResult; + return 0; + } + + TString totEventsStr = entry->GetRunParameter("totalEvents"); + Int_t totEvents = totEventsStr.Atoi(); + if (totEvents < 1) + { + Log("SHUTTLE", + Form("QueryRunParameters - Run %d has 0 events - Skipping!", run)); + + Log("SHUTTLE", Form("Marking SHUTTLE done for run %d", run)); + fLogbookEntry = entry; + if (!UpdateShuttleLogbook("shuttle_ignored")) + { + AliError(Form("Could not update logbook for run %d !", run)); + } + fLogbookEntry = 0; + delete entry; delete aRow; delete aResult; @@ -1721,49 +2158,41 @@ AliShuttleLogbookEntry* AliShuttle::QueryRunParameters(Int_t run) } //______________________________________________________________________________________________ -Bool_t AliShuttle::GetValueSet(const char* host, Int_t port, const char* entry, - TObjArray* valueSet, DCSType type) +TMap* AliShuttle::GetValueSet(const char* host, Int_t port, const TSeqCollection* entries, + DCSType type, Int_t multiSplit) { // Retrieve all "entry" data points from the DCS server // host, port: TSocket connection parameters - // entry: name of the alias or data point - // valueSet: array of retrieved AliDCSValue's + // entries: list of name of the alias or data point // type: kAlias or kDP + // returns TMap of values, 0 when failure + + AliDCSClient client(host, port, fTimeout, fRetries, multiSplit); - AliDCSClient client(host, port, fTimeout, fRetries); - if (!client.IsConnected()) - { - return kFALSE; - } - - Int_t result=0; - + TMap* result = 0; if (type == kAlias) { - result = client.GetAliasValues(entry, - GetCurrentStartTime(), GetCurrentEndTime(), valueSet); - } else - if (type == kDP) + result = client.GetAliasValues(entries, GetCurrentStartTime(), + GetCurrentEndTime()); + } + else if (type == kDP) { - result = client.GetDPValues(entry, - GetCurrentStartTime(), GetCurrentEndTime(), valueSet); + result = client.GetDPValues(entries, GetCurrentStartTime(), + GetCurrentEndTime()); } - if (result < 0) + if (result == 0) { - Log(fCurrentDetector.Data(), Form("GetValueSet - Can't get '%s'! Reason: %s", - entry, AliDCSClient::GetErrorString(result))); - - if (result == AliDCSClient::fgkServerError) - { - Log(fCurrentDetector.Data(), Form("GetValueSet - Server error: %s", + Log(fCurrentDetector.Data(), Form("GetValueSet - Can't get entries! Reason: %s", + client.GetErrorString(client.GetResultErrorCode()))); + if (client.GetResultErrorCode() == AliDCSClient::fgkServerError) + Log(fCurrentDetector.Data(), Form("GetValueSet - Server error code: %s", client.GetServerError().Data())); - } - return kFALSE; + return 0; } - - return kTRUE; + + return result; } //______________________________________________________________________________________________ @@ -1872,8 +2301,10 @@ const char* AliShuttle::GetFile(Int_t system, const char* detector, filePath.Data(), fileSize.Data(), fileChecksum.Data())); // retrieved file is renamed to make it unique - TString localFileName = Form("%s_%s_%d_%s_%s.shuttle", - GetSystemName(system), detector, GetCurrentRun(), id, sourceName.Data()); + TString localFileName = Form("%s/%s_%d_process/%s_%s_%d_%s_%s.shuttle", + GetShuttleTempDir(), detector, GetCurrentRun(), + GetSystemName(system), detector, GetCurrentRun(), + id, sourceName.Data()); // file retrieval from FXS @@ -1890,17 +2321,13 @@ const char* AliShuttle::GetFile(Int_t system, const char* detector, Log(detector, Form("GetFileName - Copy of file %s from %s FXS failed", filePath.Data(), GetSystemName(system))); continue; - } else { - AliInfo(Form("File %s copied from %s FXS into %s/%s", - filePath.Data(), GetSystemName(system), - GetShuttleTempDir(), localFileName.Data())); - } + } if (fileChecksum.Length()>0) { // compare md5sum of local file with the one stored in the FXS DB - Int_t md5Comp = gSystem->Exec(Form("md5sum %s/%s |grep %s 2>&1 > /dev/null", - GetShuttleTempDir(), localFileName.Data(), fileChecksum.Data())); + Int_t md5Comp = gSystem->Exec(Form("md5sum %s |grep %s 2>&1 > /dev/null", + localFileName.Data(), fileChecksum.Data())); if (md5Comp != 0) { @@ -1922,13 +2349,14 @@ const char* AliShuttle::GetFile(Int_t system, const char* detector, TObjString *fileParams = new TObjString(Form("%s#!?!#%s", id, sourceName.Data())); fFXSlist[system].Add(fileParams); - static TString fullLocalFileName; - fullLocalFileName = TString::Format("%s/%s", GetShuttleTempDir(), localFileName.Data()); - - AliInfo(Form("fullLocalFileName = %s", fullLocalFileName.Data())); - - return fullLocalFileName.Data(); - + static TString staticLocalFileName; + staticLocalFileName.Form("%s", localFileName.Data()); + + Log(fCurrentDetector, Form("GetFile - Retrieved file with id %s and " + "source %s from %s to %s", id, source, + GetSystemName(system), localFileName.Data())); + + return staticLocalFileName.Data(); } //______________________________________________________________________________________________ @@ -1939,18 +2367,21 @@ Bool_t AliShuttle::RetrieveFile(UInt_t system, const char* fxsFileName, const ch // // check temp directory: trying to cd to temp; if it does not exist, create it - AliDebug(2, Form("Copy file %s from %s FXS into %s/%s", - GetSystemName(system), fxsFileName, GetShuttleTempDir(), localFileName)); + AliDebug(2, Form("Copy file %s from %s FXS into %s", + GetSystemName(system), fxsFileName, localFileName)); + + TString tmpDir(localFileName); + + tmpDir = tmpDir(0,tmpDir.Last('/')); - void* dir = gSystem->OpenDirectory(GetShuttleTempDir()); - if (dir == NULL) { - if (gSystem->mkdir(GetShuttleTempDir(), kTRUE)) { - AliError(Form("Can't open directory <%s>", GetShuttleTempDir())); + Int_t noDir = gSystem->GetPathInfo(tmpDir.Data(), 0, (Long64_t*) 0, 0, 0); + if (noDir) // temp dir does not exists! + { + if (gSystem->mkdir(tmpDir.Data(), 1)) + { + Log(fCurrentDetector.Data(), "RetrieveFile - could not make temp directory!!"); return kFALSE; } - - } else { - gSystem->FreeDirectory(dir); } TString baseFXSFolder; @@ -1964,17 +2395,16 @@ Bool_t AliShuttle::RetrieveFile(UInt_t system, const char* fxsFileName, const ch } else if (system == kHLT) { - baseFXSFolder = "~/"; + baseFXSFolder = "/opt/FXS/"; } - TString command = Form("scp -oPort=%d -2 %s@%s:%s%s %s/%s", + TString command = Form("scp -oPort=%d -2 %s@%s:%s%s %s", fConfig->GetFXSPort(system), fConfig->GetFXSUser(system), fConfig->GetFXSHost(system), baseFXSFolder.Data(), fxsFileName, - GetShuttleTempDir(), localFileName); AliDebug(2, Form("%s",command.Data())); @@ -1991,6 +2421,8 @@ TList* AliShuttle::GetFileSources(Int_t system, const char* detector, const char // Get sources producing the condition file Id from file exchange servers // if id is NULL all sources are returned (distinct) // + + Log(detector, Form("GetFileSources - Retrieving sources with id %s from %s", id, GetSystemName(system))); // check if test mode should simulate a FXS error if (fTestMode & kErrorFXSSources) @@ -1999,11 +2431,13 @@ TList* AliShuttle::GetFileSources(Int_t system, const char* detector, const char return 0; } - if (system == kDCS) { - AliError("DCS system has only one source of data!"); - return NULL; + Log(detector, "GetFileSources - WARNING: DCS system has only one source of data!"); + TList *list = new TList(); + list->SetOwner(1); + list->Add(new TObjString(" ")); + return list; } // check connection, in case connect @@ -2051,8 +2485,9 @@ TList* AliShuttle::GetFileSources(Int_t system, const char* detector, const char return list; } - TSQLRow* aRow; + Log(detector, Form("GetFileSources - Found %d sources", aResult->GetRowCount())); + TSQLRow* aRow; while ((aRow = aResult->Next())) { @@ -2074,6 +2509,8 @@ TList* AliShuttle::GetFileIDs(Int_t system, const char* detector, const char* so // Get all ids of condition files produced by a given source from file exchange servers // + Log(detector, Form("GetFileIDs - Retrieving ids with source %s with %s", source, GetSystemName(system))); + // check if test mode should simulate a FXS error if (fTestMode & kErrorFXSSources) { @@ -2126,6 +2563,8 @@ TList* AliShuttle::GetFileIDs(Int_t system, const char* detector, const char* so return list; } + Log(detector, Form("GetFileIDs - Found %d ids", aResult->GetRowCount())); + TSQLRow* aRow; while ((aRow = aResult->Next())) @@ -2349,17 +2788,22 @@ Bool_t AliShuttle::UpdateShuttleLogbook(const char* detector, const char* status TString detName(detector); TString setClause; - if(detName == "shuttle_done") + if (detName == "shuttle_done" || detName == "shuttle_ignored") { setClause = "set shuttle_done=1"; - // Send the information to ML - TMonaLisaText mlStatus("SHUTTLE_status", "Done"); - - TList mlList; - mlList.Add(&mlStatus); + if (detName == "shuttle_done") + { + // Send the information to ML + TMonaLisaText mlStatus("SHUTTLE_status", "Done"); - fMonaLisa->SendParameters(&mlList); + TList mlList; + mlList.Add(&mlStatus); + + TString mlID; + mlID.Form("%d", GetCurrentRun()); + fMonaLisa->SendParameters(&mlList, mlID); + } } else { TString statusStr(status); if(statusStr.Contains("done", TString::kIgnoreCase) || @@ -2422,6 +2866,34 @@ UInt_t AliShuttle::GetCurrentEndTime() const return fLogbookEntry ? fLogbookEntry->GetEndTime() : 0; } +//______________________________________________________________________________________________ +UInt_t AliShuttle::GetCurrentYear() const +{ + // + // Get current year from logbook entry + // + + if (!fLogbookEntry) return 0; + + TTimeStamp startTime(GetCurrentStartTime()); + TString year = Form("%d",startTime.GetDate()); + year = year(0,4); + + return year.Atoi(); +} + +//______________________________________________________________________________________________ +const char* AliShuttle::GetLHCPeriod() const +{ + // + // Get current LHC period from logbook entry + // + + if (!fLogbookEntry) return 0; + + return fLogbookEntry->GetRunParameter("LHCperiod"); +} + //______________________________________________________________________________________________ void AliShuttle::Log(const char* detector, const char* message) { @@ -2429,9 +2901,13 @@ void AliShuttle::Log(const char* detector, const char* message) // Fill log string with a message // - void* dir = gSystem->OpenDirectory(GetShuttleLogDir()); + TString logRunDir = GetShuttleLogDir(); + if (GetCurrentRun() >=0) + logRunDir += Form("/%d", GetCurrentRun()); + + void* dir = gSystem->OpenDirectory(logRunDir.Data()); if (dir == NULL) { - if (gSystem->mkdir(GetShuttleLogDir(), kTRUE)) { + if (gSystem->mkdir(logRunDir.Data(), kTRUE)) { AliError(Form("Can't open directory <%s>", GetShuttleLogDir())); return; } @@ -2478,13 +2954,29 @@ TString AliShuttle::GetLogFileName(const char* detector) const TString fileName; if (GetCurrentRun() >= 0) - fileName.Form("%s/%s_%d.log", GetShuttleLogDir(), detector, GetCurrentRun()); - else + { + fileName.Form("%s/%d/%s_%d.log", GetShuttleLogDir(), GetCurrentRun(), + detector, GetCurrentRun()); + } else { fileName.Form("%s/%s.log", GetShuttleLogDir(), detector); + } return fileName; } +//______________________________________________________________________________________________ +void AliShuttle::SendAlive() +{ + // sends alive message to ML + + TMonaLisaText mlStatus("SHUTTLE_status", "Alive"); + + TList mlList; + mlList.Add(&mlStatus); + + fMonaLisa->SendParameters(&mlList, "__PROCESSINGINFO__"); +} + //______________________________________________________________________________________________ Bool_t AliShuttle::Collect(Int_t run) { @@ -2502,6 +2994,13 @@ Bool_t AliShuttle::Collect(Int_t run) SetLastAction("Starting"); + // create ML instance + if (!fMonaLisa) + fMonaLisa = new TMonaLisaWriter(fConfig->GetMonitorHost(), fConfig->GetMonitorTable()); + + + SendAlive(); + TString whereClause("where shuttle_done=0"); if (run != -1) whereClause += Form(" and run=%d", run); @@ -2584,8 +3083,8 @@ Bool_t AliShuttle::RetrieveConditionsData(const TObjArray& dateEntries) } // clean SHUTTLE temp directory - TString filename = Form("%s/*.shuttle", GetShuttleTempDir()); - RemoveFile(filename.Data()); + //TString filename = Form("%s/*.shuttle", GetShuttleTempDir()); + //RemoveFile(filename.Data()); } return hasError == kFALSE; @@ -2695,7 +3194,7 @@ Bool_t AliShuttle::SendMail() { if (gSystem->mkdir(GetShuttleLogDir(), kTRUE)) { - AliError(Form("Can't open directory <%s>", GetShuttleLogDir())); + Log("SHUTTLE", Form("SendMail - Can't open directory <%s>", GetShuttleLogDir())); return kFALSE; } @@ -2712,7 +3211,7 @@ Bool_t AliShuttle::SendMail() if (!mailBody.is_open()) { - AliError(Form("Could not open mail body file %s", bodyFileName.Data())); + Log("SHUTTLE", Form("Could not open mail body file %s", bodyFileName.Data())); return kFALSE; } @@ -2727,25 +3226,152 @@ Bool_t AliShuttle::SendMail() AliDebug(2, Form("to: %s",to.Data())); if (to.IsNull()) { - AliInfo("List of detector responsibles not yet set!"); + Log("SHUTTLE", "List of detector responsibles not yet set!"); return kFALSE; } TString cc="alberto.colla@cern.ch"; - TString subject = Form("%s Shuttle preprocessor FAILED in run %d !", - fCurrentDetector.Data(), GetCurrentRun()); + TString subject = Form("%s Shuttle preprocessor FAILED in run %d (run type = %s)!", + fCurrentDetector.Data(), GetCurrentRun(), GetRunType()); AliDebug(2, Form("subject: %s", subject.Data())); TString body = Form("Dear %s expert(s), \n\n", fCurrentDetector.Data()); body += Form("SHUTTLE just detected that your preprocessor " - "failed processing run %d!!\n\n", GetCurrentRun()); - body += Form("Please check %s status on the SHUTTLE monitoring page: \n\n", fCurrentDetector.Data()); - body += Form("\thttp://pcalimonitor.cern.ch:8889/shuttle.jsp?time=168 \n\n"); + "failed processing run %d (run type = %s)!!\n\n", + GetCurrentRun(), GetRunType()); + body += Form("Please check %s status on the SHUTTLE monitoring page: \n\n", + fCurrentDetector.Data()); + if (fConfig->GetRunMode() == AliShuttleConfig::kTest) + { + body += Form("\thttp://pcalimonitor.cern.ch:8889/shuttle.jsp?time=168 \n\n"); + } else { + body += Form("\thttp://pcalimonitor.cern.ch/shuttle.jsp?instance=PROD?time=168 \n\n"); + } + + + TString logFolder = "logs"; + if (fConfig->GetRunMode() == AliShuttleConfig::kProd) + logFolder += "_PROD"; + + + body += Form("Find the %s log for the current run on \n\n" + "\thttp://pcalishuttle01.cern.ch:8880/%s/%d/%s_%d.log \n\n", + fCurrentDetector.Data(), logFolder.Data(), GetCurrentRun(), + fCurrentDetector.Data(), GetCurrentRun()); + body += Form("The last 10 lines of %s log file are following:\n\n", fCurrentDetector.Data()); + + AliDebug(2, Form("Body begin: %s", body.Data())); + + mailBody << body.Data(); + mailBody.close(); + mailBody.open(bodyFileName, ofstream::out | ofstream::app); + + TString logFileName = Form("%s/%d/%s_%d.log", GetShuttleLogDir(), + GetCurrentRun(), fCurrentDetector.Data(), GetCurrentRun()); + TString tailCommand = Form("tail -n 10 %s >> %s", logFileName.Data(), bodyFileName.Data()); + if (gSystem->Exec(tailCommand.Data())) + { + mailBody << Form("%s log file not found ...\n\n", fCurrentDetector.Data()); + } + + TString endBody = Form("------------------------------------------------------\n\n"); + endBody += Form("In case of problems please contact the SHUTTLE core team.\n\n"); + endBody += "Please do not answer this message directly, it is automatically generated.\n\n"; + endBody += "Greetings,\n\n \t\t\tthe SHUTTLE\n"; + + AliDebug(2, Form("Body end: %s", endBody.Data())); + + mailBody << endBody.Data(); + + mailBody.close(); + + // send mail! + TString mailCommand = Form("mail -s \"%s\" -c %s %s < %s", + subject.Data(), + cc.Data(), + to.Data(), + bodyFileName.Data()); + AliDebug(2, Form("mail command: %s", mailCommand.Data())); + + Bool_t result = gSystem->Exec(mailCommand.Data()); + + return result == 0; +} + +//______________________________________________________________________________________________ +Bool_t AliShuttle::SendMailToDCS() +{ + // + // sends a mail to the DCS experts in case of DCS error + // + + if (fTestMode != kNone) + return kTRUE; + + void* dir = gSystem->OpenDirectory(GetShuttleLogDir()); + if (dir == NULL) + { + if (gSystem->mkdir(GetShuttleLogDir(), kTRUE)) + { + Log("SHUTTLE", Form("SendMailToDCS - Can't open directory <%s>", GetShuttleLogDir())); + return kFALSE; + } + + } else { + gSystem->FreeDirectory(dir); + } + + TString bodyFileName; + bodyFileName.Form("%s/mail.body", GetShuttleLogDir()); + gSystem->ExpandPathName(bodyFileName); + + ofstream mailBody; + mailBody.open(bodyFileName, ofstream::out); + + if (!mailBody.is_open()) + { + Log("SHUTTLE", Form("SendMailToDCS - Could not open mail body file %s", bodyFileName.Data())); + return kFALSE; + } + + TString to="Vladimir.Fekete@cern.ch, Svetozar.Kapusta@cern.ch"; + //TString to="alberto.colla@cern.ch"; + AliDebug(2, Form("to: %s",to.Data())); + + if (to.IsNull()) { + Log("SHUTTLE", "List of detector responsibles not yet set!"); + return kFALSE; + } + + TString cc="alberto.colla@cern.ch"; + + TString subject = Form("Retrieval of data points for %s FAILED in run %d !", + fCurrentDetector.Data(), GetCurrentRun()); + AliDebug(2, Form("subject: %s", subject.Data())); + + TString body = Form("Dear DCS experts, \n\n"); + body += Form("SHUTTLE couldn\'t retrieve the data points for detector %s " + "in run %d!!\n\n", fCurrentDetector.Data(), GetCurrentRun()); + body += Form("Please check %s status on the SHUTTLE monitoring page: \n\n", + fCurrentDetector.Data()); + if (fConfig->GetRunMode() == AliShuttleConfig::kTest) + { + body += Form("\thttp://pcalimonitor.cern.ch:8889/shuttle.jsp?time=168 \n\n"); + } else { + body += Form("\thttp://pcalimonitor.cern.ch/shuttle.jsp?instance=PROD?time=168 \n\n"); + } + + TString logFolder = "logs"; + if (fConfig->GetRunMode() == AliShuttleConfig::kProd) + logFolder += "_PROD"; + + body += Form("Find the %s log for the current run on \n\n" - "\thttp://pcalishuttle01.cern.ch:8880/logs/%s_%d.log \n\n", - fCurrentDetector.Data(), fCurrentDetector.Data(), GetCurrentRun()); - body += Form("The last 10 lines of %s log file are following:\n\n"); + "\thttp://pcalishuttle01.cern.ch:8880/%s/%d/%s_%d.log \n\n", + fCurrentDetector.Data(), logFolder.Data(), GetCurrentRun(), + fCurrentDetector.Data(), GetCurrentRun()); + body += Form("The last 10 lines of %s log file are following:\n\n", fCurrentDetector.Data()); AliDebug(2, Form("Body begin: %s", body.Data())); @@ -2753,7 +3379,8 @@ Bool_t AliShuttle::SendMail() mailBody.close(); mailBody.open(bodyFileName, ofstream::out | ofstream::app); - TString logFileName = Form("%s/%s_%d.log", GetShuttleLogDir(), fCurrentDetector.Data(), GetCurrentRun()); + TString logFileName = Form("%s/%d/%s_%d.log", GetShuttleLogDir(), GetCurrentRun(), + fCurrentDetector.Data(), GetCurrentRun()); TString tailCommand = Form("tail -n 10 %s >> %s", logFileName.Data(), bodyFileName.Data()); if (gSystem->Exec(tailCommand.Data())) { @@ -2799,6 +3426,24 @@ const char* AliShuttle::GetRunType() return fLogbookEntry->GetRunType(); } +//______________________________________________________________________________________________ +Bool_t AliShuttle::GetHLTStatus() +{ + // Return HLT status (ON=1 OFF=0) + // Converts the HLT status from the status string read in the run logbook (not just a bool) + + if(!fLogbookEntry) { + AliError("No logbook entry!"); + return 0; + } + + // TODO implement when HLTStatus is inserted in run logbook + //TString hltStatus = fLogbookEntry->GetRunParameter("HLTStatus"); + //if(hltStatus == "OFF") {return kFALSE}; + + return kTRUE; +} + //______________________________________________________________________________________________ void AliShuttle::SetShuttleTempDir(const char* tmpDir) {