1 /**************************************************************************
2 * Copyright(c) 1998-1999, ALICE Experiment at CERN, All rights reserved. *
4 * Author: The ALICE Off-line Project. *
5 * Contributors are mentioned in the code where appropriate. *
7 * Permission to use, copy, modify and distribute this software and its *
8 * documentation strictly for non-commercial purposes is hereby granted *
9 * without fee, provided that the above copyright notice appears in all *
10 * copies and that both the copyright notice and this permission notice *
11 * appear in the supporting documentation. The authors make no claims *
12 * about the suitability of this software for any purpose. It is *
13 * provided "as is" without express or implied warranty. *
14 **************************************************************************/
18 Revision 1.26 2007/01/23 19:20:03 acolla
19 Removed old ldif files, added TOF, MCH ldif files. Added some options in
20 AliShuttleConfig::Print. Added in Ali Shuttle: SetShuttleTempDir and
23 Revision 1.25 2007/01/15 19:13:52 acolla
24 Moved some AliInfo to AliDebug in SendMail function
26 Revision 1.21 2006/12/07 08:51:26 jgrosseo
28 table, db names in ldap configuration
29 added GRP preprocessor
30 DCS data can also be retrieved by data point
32 Revision 1.20 2006/11/16 16:16:48 jgrosseo
33 introducing strict run ordering flag
34 removed giving preprocessor name to preprocessor, they have to know their name themselves ;-)
36 Revision 1.19 2006/11/06 14:23:04 jgrosseo
37 major update (Alberto)
38 o) reading of run parameters from the logbook
39 o) online offline naming conversion
40 o) standalone DCSclient package
42 Revision 1.18 2006/10/20 15:22:59 jgrosseo
43 o) Adding time out to the execution of the preprocessors: The Shuttle forks and the parent process monitors the child
44 o) Merging Collect, CollectAll, CollectNew function
45 o) Removing implementation of empty copy constructors (declaration still there!)
47 Revision 1.17 2006/10/05 16:20:55 jgrosseo
48 adapting to new CDB classes
50 Revision 1.16 2006/10/05 15:46:26 jgrosseo
51 applying to the new interface
53 Revision 1.15 2006/10/02 16:38:39 jgrosseo
56 storing of objects that failed to be stored to the grid before
57 interfacing of shuttle status table in daq system
59 Revision 1.14 2006/08/29 09:16:05 jgrosseo
62 Revision 1.13 2006/08/15 10:50:00 jgrosseo
63 effc++ corrections (alberto)
65 Revision 1.12 2006/08/08 14:19:29 jgrosseo
66 Update to shuttle classes (Alberto)
68 - Possibility to set the full object's path in the Preprocessor's and
69 Shuttle's Store functions
70 - Possibility to extend the object's run validity in the same classes
71 ("startValidity" and "validityInfinite" parameters)
72 - Implementation of the StoreReferenceData function to store reference
73 data in a dedicated CDB storage.
75 Revision 1.11 2006/07/21 07:37:20 jgrosseo
76 last run is stored after each run
78 Revision 1.10 2006/07/20 09:54:40 jgrosseo
79 introducing status management: The processing per subdetector is divided into several steps,
80 after each step the status is stored on disk. If the system crashes in any of the steps the Shuttle
81 can keep track of the number of failures and skips further processing after a certain threshold is
82 exceeded. These thresholds can be configured in LDAP.
84 Revision 1.9 2006/07/19 10:09:55 jgrosseo
85 new configuration, accesst to DAQ FES (Alberto)
87 Revision 1.8 2006/07/11 12:44:36 jgrosseo
88 adding parameters for extended validity range of data produced by preprocessor
90 Revision 1.7 2006/07/10 14:37:09 jgrosseo
91 small fix + todo comment
93 Revision 1.6 2006/07/10 13:01:41 jgrosseo
94 enhanced storing of last sucessfully processed run (alberto)
96 Revision 1.5 2006/07/04 14:59:57 jgrosseo
97 revision of AliDCSValue: Removed wrapper classes, reduced storage size per value by factor 2
99 Revision 1.4 2006/06/12 09:11:16 jgrosseo
100 coding conventions (Alberto)
102 Revision 1.3 2006/06/06 14:26:40 jgrosseo
103 o) removed files that were moved to STEER
104 o) shuttle updated to follow the new interface (Alberto)
106 Revision 1.2 2006/03/07 07:52:34 hristov
107 New version (B.Yordanov)
109 Revision 1.6 2005/11/19 17:19:14 byordano
110 RetrieveDATEEntries and RetrieveConditionsData added
112 Revision 1.5 2005/11/19 11:09:27 byordano
113 AliShuttle declaration added
115 Revision 1.4 2005/11/17 17:47:34 byordano
116 TList changed to TObjArray
118 Revision 1.3 2005/11/17 14:43:23 byordano
121 Revision 1.1.1.1 2005/10/28 07:33:58 hristov
122 Initial import as subdirectory in AliRoot
124 Revision 1.2 2005/09/13 08:41:15 byordano
125 default startTime endTime added
127 Revision 1.4 2005/08/30 09:13:02 byordano
130 Revision 1.3 2005/08/29 21:15:47 byordano
136 // This class is the main manager for AliShuttle.
137 // It organizes the data retrieval from DCS and call the
138 // interface methods of AliPreprocessor.
139 // For every detector in AliShuttleConfgi (see AliShuttleConfig),
140 // data for its set of aliases is retrieved. If there is registered
141 // AliPreprocessor for this detector then it will be used
142 // accroding to the schema (see AliPreprocessor).
143 // If there isn't registered AliPreprocessor than the retrieved
144 // data is stored automatically to the undelying AliCDBStorage.
145 // For detSpec is used the alias name.
148 #include "AliShuttle.h"
150 #include "AliCDBManager.h"
151 #include "AliCDBStorage.h"
152 #include "AliCDBId.h"
153 #include "AliCDBRunRange.h"
154 #include "AliCDBPath.h"
155 #include "AliCDBEntry.h"
156 #include "AliShuttleConfig.h"
157 #include "DCSClient/AliDCSClient.h"
159 #include "AliPreprocessor.h"
160 #include "AliShuttleStatus.h"
161 #include "AliShuttleLogbookEntry.h"
166 #include <TTimeStamp.h>
167 #include <TObjString.h>
168 #include <TSQLServer.h>
169 #include <TSQLResult.h>
173 #include <TMonaLisaWriter.h>
177 #include <sys/types.h>
178 #include <sys/wait.h>
182 TString AliShuttle::fgkMainCDB("alien://folder=ShuttleCDB");
183 TString AliShuttle::fgkLocalCDB("local://LocalShuttleCDB");
184 TString AliShuttle::fgkMainRefStorage("alien://folder=ShuttleReference");
185 TString AliShuttle::fgkLocalRefStorage("local://LocalReferenceStorage");
187 Bool_t AliShuttle::fgkProcessDCS(kTRUE);
189 TString AliShuttle::fgkShuttleTempDir = gSystem->ExpandPathName("$ALICE_ROOT/SHUTTLE/temp");
190 TString AliShuttle::fgkShuttleLogDir = gSystem->ExpandPathName("$ALICE_ROOT/SHUTTLE/log");
192 //______________________________________________________________________________________________
193 AliShuttle::AliShuttle(const AliShuttleConfig* config,
194 UInt_t timeout, Int_t retries):
196 fTimeout(timeout), fRetries(retries),
208 // config: AliShuttleConfig used
209 // timeout: timeout used for AliDCSClient connection
210 // retries: the number of retries in case of connection error.
213 if (!fConfig->IsValid()) AliFatal("********** !!!!! Invalid configuration !!!!! **********");
214 for(int iSys=0;iSys<4;iSys++) {
217 fFXSlist[iSys].SetOwner(kTRUE);
219 fPreprocessorMap.SetOwner(kTRUE);
221 for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
222 fFirstUnprocessed[iDet] = kFALSE;
224 fMonitoringMutex = new TMutex();
227 //______________________________________________________________________________________________
228 AliShuttle::~AliShuttle()
232 fPreprocessorMap.DeleteAll();
233 for(int iSys=0;iSys<4;iSys++)
235 fServer[iSys]->Close();
236 delete fServer[iSys];
245 if (fMonitoringMutex)
247 delete fMonitoringMutex;
248 fMonitoringMutex = 0;
252 //______________________________________________________________________________________________
253 void AliShuttle::RegisterPreprocessor(AliPreprocessor* preprocessor)
256 // Registers new AliPreprocessor.
257 // It uses GetName() for indentificator of the pre processor.
258 // The pre processor is registered it there isn't any other
259 // with the same identificator (GetName()).
262 const char* detName = preprocessor->GetName();
263 if(GetDetPos(detName) < 0)
264 AliFatal(Form("********** !!!!! Invalid detector name: %s !!!!! **********", detName));
266 if (fPreprocessorMap.GetValue(detName)) {
267 AliWarning(Form("AliPreprocessor %s is already registered!", detName));
271 fPreprocessorMap.Add(new TObjString(detName), preprocessor);
273 //______________________________________________________________________________________________
274 UInt_t AliShuttle::Store(const AliCDBPath& path, TObject* object,
275 AliCDBMetaData* metaData, Int_t validityStart, Bool_t validityInfinite)
277 // Stores a CDB object in the storage for offline reconstruction. Objects that are not needed for
278 // offline reconstruction, but should be stored anyway (e.g. for debugging) should NOT be stored
279 // using this function. Use StoreReferenceData instead!
280 // It calls WriteToCDB function which perform actual storage
282 return WriteToCDB(fgkMainCDB, fgkLocalCDB, path, object,
283 metaData, validityStart, validityInfinite);
287 //______________________________________________________________________________________________
288 UInt_t AliShuttle::StoreReferenceData(const AliCDBPath& path, TObject* object, AliCDBMetaData* metaData)
290 // Stores a CDB object in the storage for reference data. This objects will not be available during
291 // offline reconstrunction. Use this function for reference data only!
292 // It calls WriteToCDB function which perform actual storage
294 return WriteToCDB(fgkMainRefStorage, fgkLocalRefStorage, path, object, metaData);
298 //______________________________________________________________________________________________
299 UInt_t AliShuttle::WriteToCDB(const char* mainUri, const char* localUri,
300 const AliCDBPath& path, TObject* object, AliCDBMetaData* metaData,
301 Int_t validityStart, Bool_t validityInfinite)
303 // write object into the CDB. Parameters are passed by Store and StoreReferenceData functions.
304 // The parameters are:
305 // 1) Uri of the main storage (Grid)
306 // 2) Uri of the backup storage (Local)
307 // 3) the object's path.
308 // 4) the object to be stored
309 // 5) the metaData to be associated with the object
310 // 6) the validity start run number w.r.t. the current run,
311 // if the data is valid only for this run leave the default 0
312 // 7) specifies if the calibration data is valid for infinity (this means until updated),
313 // typical for calibration runs, the default is kFALSE
316 // 1 if stored in main (Grid) storage
317 // 2 if stored in backup (Local) storage
319 const char* cdbType = (mainUri == fgkMainCDB) ? "CDB" : "Reference";
321 Int_t firstRun = GetCurrentRun() - validityStart;
323 AliError("First valid run happens to be less than 0! Setting it to 0.");
328 if(validityInfinite) {
329 lastRun = AliCDBRunRange::Infinity();
331 lastRun = GetCurrentRun();
334 AliCDBId id(path, firstRun, lastRun, -1, -1);
336 if(! dynamic_cast<TObjString*> (metaData->GetProperty("RunUsed(TObjString)"))){
337 TObjString runUsed = Form("%d", GetCurrentRun());
338 metaData->SetProperty("RunUsed(TObjString)", runUsed.Clone());
343 if (!(AliCDBManager::Instance()->GetStorage(mainUri))) {
344 AliError(Form("WriteToCDB - Cannot activate main %s storage", cdbType));
346 result = (UInt_t) AliCDBManager::Instance()->GetStorage(mainUri)
347 ->Put(object, id, metaData);
352 Log(fCurrentDetector,
353 Form("WriteToCDB - Problem with main %s storage. Putting <%s> into backup storage",
354 cdbType, path.GetPath().Data()));
356 // Set Grid version to current run number, to ease retrieval later
357 id.SetVersion(GetCurrentRun());
359 result = AliCDBManager::Instance()->GetStorage(localUri)
360 ->Put(object, id, metaData);
366 Log(fCurrentDetector, "WriteToCDB - Can't store data!");
374 //______________________________________________________________________________________________
375 AliShuttleStatus* AliShuttle::ReadShuttleStatus()
377 // Reads the AliShuttleStatus from the CDB
384 fStatusEntry = AliCDBManager::Instance()->GetStorage(AliShuttle::GetLocalCDB())
385 ->Get(Form("/SHUTTLE/STATUS/%s", fCurrentDetector.Data()), GetCurrentRun());
387 if (!fStatusEntry) return 0;
388 fStatusEntry->SetOwner(1);
390 AliShuttleStatus* status = dynamic_cast<AliShuttleStatus*> (fStatusEntry->GetObject());
392 AliError("Invalid object stored to CDB!");
399 //______________________________________________________________________________________________
400 Bool_t AliShuttle::WriteShuttleStatus(AliShuttleStatus* status)
402 // writes the status for one subdetector
409 Int_t run = GetCurrentRun();
411 AliCDBId id(AliCDBPath("SHUTTLE", "STATUS", fCurrentDetector), run, run);
413 fStatusEntry = new AliCDBEntry(status, id, new AliCDBMetaData);
414 fStatusEntry->SetOwner(1);
416 UInt_t result = AliCDBManager::Instance()->GetStorage(fgkLocalCDB)->Put(fStatusEntry);
419 AliError(Form("WriteShuttleStatus for %s, run %d failed", fCurrentDetector.Data(), run));
428 //______________________________________________________________________________________________
429 void AliShuttle::UpdateShuttleStatus(AliShuttleStatus::Status newStatus, Bool_t increaseCount)
431 // changes the AliShuttleStatus for the given detector and run to the given status
434 AliError("UNEXPECTED: fStatusEntry empty");
438 AliShuttleStatus* status = dynamic_cast<AliShuttleStatus*> (fStatusEntry->GetObject());
441 AliError("UNEXPECTED: status could not be read from current CDB entry");
445 TString actionStr = Form("UpdateShuttleStatus - %s: Changing state from %s to %s",
446 fCurrentDetector.Data(),
447 status->GetStatusName(),
448 status->GetStatusName(newStatus));
449 Log("SHUTTLE", actionStr);
450 SetLastAction(actionStr);
452 status->SetStatus(newStatus);
453 if (increaseCount) status->IncreaseCount();
455 AliCDBManager::Instance()->GetStorage(fgkLocalCDB)->Put(fStatusEntry);
460 //______________________________________________________________________________________________
461 void AliShuttle::SendMLInfo()
464 // sends ML information about the current status of the current detector being processed
467 AliShuttleStatus* status = dynamic_cast<AliShuttleStatus*> (fStatusEntry->GetObject());
470 AliError("UNEXPECTED: status could not be read from current CDB entry");
474 TMonaLisaText mlStatus(Form("%s_status", fCurrentDetector.Data()), status->GetStatusName());
475 TMonaLisaValue mlRetryCount(Form("%s_count", fCurrentDetector.Data()), status->GetCount());
478 mlList.Add(&mlStatus);
479 mlList.Add(&mlRetryCount);
481 fMonaLisa->SendParameters(&mlList);
484 //______________________________________________________________________________________________
485 Bool_t AliShuttle::ContinueProcessing()
487 // this function reads the AliShuttleStatus information from CDB and
488 // checks if the processing should be continued
489 // if yes it returns kTRUE and updates the AliShuttleStatus with nextStatus
491 if (!fConfig->HostProcessDetector(fCurrentDetector)) return kFALSE;
493 AliPreprocessor* aPreprocessor =
494 dynamic_cast<AliPreprocessor*> (fPreprocessorMap.GetValue(fCurrentDetector));
497 AliInfo(Form("%s: no preprocessor registered", fCurrentDetector.Data()));
501 AliShuttleLogbookEntry::Status entryStatus =
502 fLogbookEntry->GetDetectorStatus(fCurrentDetector);
504 if(entryStatus != AliShuttleLogbookEntry::kUnprocessed) {
505 AliInfo(Form("ContinueProcessing - %s is %s",
506 fCurrentDetector.Data(),
507 fLogbookEntry->GetDetectorStatusName(entryStatus)));
511 // if we get here, according to Shuttle logbook subdetector is in UNPROCESSED state
513 // check if current run is first unprocessed run for current detector
514 if (fConfig->StrictRunOrder(fCurrentDetector) &&
515 !fFirstUnprocessed[GetDetPos(fCurrentDetector)])
517 Log("SHUTTLE", Form("ContinueProcessing - %s requires strict run ordering but this is not the first unprocessed run!"));
521 AliShuttleStatus* status = ReadShuttleStatus();
524 Log("SHUTTLE", Form("ContinueProcessing - %s: Processing first time",
525 fCurrentDetector.Data()));
526 status = new AliShuttleStatus(AliShuttleStatus::kStarted);
527 return WriteShuttleStatus(status);
530 // The following two cases shouldn't happen if Shuttle Logbook was correctly updated.
531 // If it happens it may mean Logbook updating failed... let's do it now!
532 if (status->GetStatus() == AliShuttleStatus::kDone ||
533 status->GetStatus() == AliShuttleStatus::kFailed){
534 Log("SHUTTLE", Form("ContinueProcessing - %s is already %s. Updating Shuttle Logbook",
535 fCurrentDetector.Data(),
536 status->GetStatusName(status->GetStatus())));
537 UpdateShuttleLogbook(fCurrentDetector.Data(),
538 status->GetStatusName(status->GetStatus()));
542 if (status->GetStatus() == AliShuttleStatus::kStoreFailed) {
544 Form("ContinueProcessing - %s: Grid storage of one or more objects failed. Trying again now",
545 fCurrentDetector.Data()));
546 if(TryToStoreAgain()){
547 Log(fCurrentDetector.Data(), "ContinueProcessing - All objects successfully stored into OCDB");
548 UpdateShuttleStatus(AliShuttleStatus::kDone);
549 UpdateShuttleLogbook(fCurrentDetector.Data(), "DONE");
552 Form("ContinueProcessing - %s: Grid storage failed again",
553 fCurrentDetector.Data()));
554 // trigger ML information manually because we do not had a status change
560 // if we get here, there is a restart
561 Bool_t cont = kFALSE;
564 if (status->GetCount() >= fConfig->GetMaxRetries()) {
565 Log("SHUTTLE", Form("ContinueProcessing - %s failed %d times in status %s - "
566 "Updating Shuttle Logbook", fCurrentDetector.Data(),
567 status->GetCount(), status->GetStatusName()));
568 UpdateShuttleLogbook(fCurrentDetector.Data(), "FAILED");
569 UpdateShuttleStatus(AliShuttleStatus::kFailed);
571 Log("SHUTTLE", Form("ContinueProcessing - %s: restarting. "
572 "Aborted before with %s. Retry number %d.", fCurrentDetector.Data(),
573 status->GetStatusName(), status->GetCount()));
574 UpdateShuttleStatus(AliShuttleStatus::kStarted, kTRUE);
578 // Send mail to detector expert!
579 AliInfo(Form("Sending mail to %s expert...", fCurrentDetector.Data()));
581 Log("SHUTTLE", Form("ContinueProcessing - Could not send mail to %s expert",
582 fCurrentDetector.Data()));
587 //______________________________________________________________________________________________
588 Bool_t AliShuttle::Process(AliShuttleLogbookEntry* entry)
591 // Makes data retrieval for all detectors in the configuration.
592 // entry: Shuttle logbook entry, contains run paramenters and status of detectors
593 // (Unprocessed, Inactive, Failed or Done).
594 // Returns kFALSE in case of error occured and kTRUE otherwise
597 if(!entry) return kFALSE;
599 fLogbookEntry = entry;
601 if (fLogbookEntry->IsDone())
603 Log("SHUTTLE","Process - Shuttle is already DONE. Updating logbook");
604 UpdateShuttleLogbook("shuttle_done");
609 // create ML instance that monitors this run
610 fMonaLisa = new TMonaLisaWriter(Form("%d", GetCurrentRun()), "SHUTTLE", "aliendb1.cern.ch");
611 // disable monitoring of other parameters that come e.g. from TFile
612 gMonitoringWriter = 0;
614 AliInfo(Form("\n\n \t\t\t^*^*^*^*^*^*^*^*^*^*^*^* run %d: START ^*^*^*^*^*^*^*^*^*^*^*^* \n",
618 // Send the information to ML
619 TMonaLisaText mlStatus("SHUTTLE_status", "Processing");
622 mlList.Add(&mlStatus);
624 fMonaLisa->SendParameters(&mlList);
626 fLogbookEntry->Print("all");
629 Bool_t hasError = kFALSE;
630 for(Int_t iSys=0;iSys<3;iSys++) fFXSCalled[iSys]=kFALSE;
632 AliCDBStorage *mainCDBSto = AliCDBManager::Instance()->GetStorage(fgkMainCDB);
633 if(mainCDBSto) mainCDBSto->QueryCDB(GetCurrentRun());
634 AliCDBStorage *mainRefSto = AliCDBManager::Instance()->GetStorage(fgkMainRefStorage);
635 if(mainRefSto) mainRefSto->QueryCDB(GetCurrentRun());
637 // Loop on detectors in the configuration
638 TIter iter(fConfig->GetDetectors());
639 TObjString* aDetector = 0;
641 while ((aDetector = (TObjString*) iter.Next()))
643 fCurrentDetector = aDetector->String();
645 if (ContinueProcessing() == kFALSE) continue;
647 AliInfo(Form("\n\n \t\t\t****** run %d - %s: START ******",
648 GetCurrentRun(), aDetector->GetName()));
650 Log(fCurrentDetector.Data(), "Starting processing");
656 Log("SHUTTLE", "ERROR: Forking failed");
661 AliInfo(Form("In parent process of %d - %s: Starting monitoring",
662 GetCurrentRun(), aDetector->GetName()));
664 Long_t begin = time(0);
666 int status; // to be used with waitpid, on purpose an int (not Int_t)!
667 while (waitpid(pid, &status, WNOHANG) == 0)
669 Long_t expiredTime = time(0) - begin;
671 if (expiredTime > fConfig->GetPPTimeOut())
673 Log("SHUTTLE", Form("Process time out. Run time: %d seconds. Killing...",
680 gSystem->Sleep(1000);
684 if (expiredTime % 60 == 0)
685 Log("SHUTTLE", Form("Checked process. Run time: %d seconds.",
687 gSystem->Sleep(1000);
691 AliInfo(Form("In parent process of %d - %s: Client has terminated.",
692 GetCurrentRun(), aDetector->GetName()));
694 if (WIFEXITED(status))
696 Int_t returnCode = WEXITSTATUS(status);
698 Log("SHUTTLE", Form("The return code is %d", returnCode));
707 AliInfo(Form("In client process of %d - %s", GetCurrentRun(), aDetector->GetName()));
709 UInt_t result = ProcessCurrentDetector();
711 Int_t returnCode = 0; // will be set to 1 in case of an error
716 AliInfo(Form("\n \t\t\t****** run %d - %s: PREPROCESSOR ERROR ****** \n\n",
717 GetCurrentRun(), aDetector->GetName()));
719 else if (result == 2)
721 AliInfo(Form("\n \t\t\t****** run %d - %s: STORAGE ERROR ****** \n\n",
722 GetCurrentRun(), aDetector->GetName()));
725 AliInfo(Form("\n \t\t\t****** run %d - %s: DONE ****** \n\n",
726 GetCurrentRun(), aDetector->GetName()));
731 // Process successful: Update time_processed field in FXS logbooks!
732 if (fFXSCalled[kDAQ])
734 if (UpdateDAQTable() == kFALSE)
736 fFXSlist[kDAQ].Clear();
738 //if(fFXSCalled[kDCS]) {
739 // if (UpdateDCSTable(aDetector->GetName()) == kFALSE)
741 // fFXSlist[kDCS].Clear();
743 if (fFXSCalled[kHLT])
745 if (UpdateHLTTable() == kFALSE)
747 fFXSlist[kHLT].Clear();
751 AliInfo(Form("Client process of %d - %s is exiting now with %d.",
752 GetCurrentRun(), aDetector->GetName(), returnCode));
754 // the client exits here
755 gSystem->Exit(returnCode);
757 AliError("We should never get here!!!");
761 AliInfo(Form("\n\n \t\t\t^*^*^*^*^*^*^*^*^*^*^*^* run %d: FINISH ^*^*^*^*^*^*^*^*^*^*^*^* \n",
764 //check if shuttle is done for this run, if so update logbook
765 TObjArray checkEntryArray;
766 checkEntryArray.SetOwner(1);
767 TString whereClause = Form("where run=%d", GetCurrentRun());
768 if (!QueryShuttleLogbook(whereClause.Data(), checkEntryArray) || checkEntryArray.GetEntries() == 0) {
769 Log("SHUTTLE", Form("Process - Warning: Cannot check status of run %d on Shuttle logbook!",
771 return hasError == kFALSE;
774 AliShuttleLogbookEntry* checkEntry = dynamic_cast<AliShuttleLogbookEntry*>
775 (checkEntryArray.At(0));
779 if (checkEntry->IsDone())
781 Log("SHUTTLE","Process - Shuttle is DONE. Updating logbook");
782 UpdateShuttleLogbook("shuttle_done");
786 for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
788 if (checkEntry->GetDetectorStatus(iDet) == AliShuttleLogbookEntry::kUnprocessed)
790 AliDebug(2, Form("Run %d: setting %s as \"not first time unprocessed\"",
791 checkEntry->GetRun(), GetDetName(iDet)));
792 fFirstUnprocessed[iDet] = kFALSE;
798 // remove ML instance
804 return hasError == kFALSE;
807 //______________________________________________________________________________________________
808 UInt_t AliShuttle::ProcessCurrentDetector()
811 // Makes data retrieval just for a specific detector (fCurrentDetector).
812 // Threre should be a configuration for this detector.
814 AliInfo(Form("Retrieving values for %s, run %d", fCurrentDetector.Data(), GetCurrentRun()));
816 UpdateShuttleStatus(AliShuttleStatus::kDCSStarted);
821 Bool_t aDCSError = kFALSE;
824 // TODO Test only... I've added a flag that allows to
825 // exclude DCS archive DB query
828 AliInfo("Skipping DCS processing!");
831 TString host(fConfig->GetDCSHost(fCurrentDetector));
832 Int_t port = fConfig->GetDCSPort(fCurrentDetector);
834 // Retrieval of Aliases
835 TObjString* anAlias = 0;
837 Int_t nTotAliases= ((TMap*)fConfig->GetDCSAliases(fCurrentDetector))->GetEntries();
838 TIter iterAliases(fConfig->GetDCSAliases(fCurrentDetector));
839 while ((anAlias = (TObjString*) iterAliases.Next()))
841 TObjArray *valueSet = new TObjArray();
842 valueSet->SetOwner(1);
844 if (((iAlias-1) % 500) == 0 || iAlias == nTotAliases)
845 AliInfo(Form("Querying DCS archive: alias %s (%d of %d)",
846 anAlias->GetName(), iAlias++, nTotAliases));
847 aDCSError = (GetValueSet(host, port, anAlias->String(), valueSet, kAlias) == 0);
851 dcsMap.Add(anAlias->Clone(), valueSet);
853 Log(fCurrentDetector,
854 Form("ProcessCurrentDetector - Error while retrieving alias %s",
855 anAlias->GetName()));
856 UpdateShuttleStatus(AliShuttleStatus::kDCSError);
862 // Retrieval of Data Points
865 Int_t nTotDPs= ((TMap*)fConfig->GetDCSDataPoints(fCurrentDetector))->GetEntries();
866 TIter iterDP(fConfig->GetDCSDataPoints(fCurrentDetector));
867 while ((aDP = (TObjString*) iterDP.Next()))
869 TObjArray *valueSet = new TObjArray();
870 valueSet->SetOwner(1);
871 if (((iDP-1) % 500) == 0 || iDP == nTotDPs)
872 AliInfo(Form("Querying DCS archive: DP %s (%d of %d)",
873 aDP->GetName(), iDP++, nTotDPs));
874 aDCSError = (GetValueSet(host, port, aDP->String(), valueSet, kDP) == 0);
878 dcsMap.Add(aDP->Clone(), valueSet);
880 Log(fCurrentDetector,
881 Form("ProcessCurrentDetector - Error while retrieving data point %s",
883 UpdateShuttleStatus(AliShuttleStatus::kDCSError);
890 // DCS Archive DB processing successful. Call Preprocessor!
891 UpdateShuttleStatus(AliShuttleStatus::kPPStarted);
893 AliPreprocessor* aPreprocessor =
894 dynamic_cast<AliPreprocessor*> (fPreprocessorMap.GetValue(fCurrentDetector));
896 aPreprocessor->Initialize(GetCurrentRun(), GetCurrentStartTime(), GetCurrentEndTime());
897 UInt_t aPPResult = aPreprocessor->Process(&dcsMap);
899 UInt_t returnValue = 0;
900 if (aPPResult == 0) { // Preprocessor error
901 UpdateShuttleStatus(AliShuttleStatus::kPPError);
903 } else if (fGridError == kFALSE) { // process and Grid storage ok!
904 UpdateShuttleStatus(AliShuttleStatus::kDone);
905 UpdateShuttleLogbook(fCurrentDetector, "DONE");
906 Log(fCurrentDetector.Data(),
907 "ProcessCurrentDetector - Preprocessor and Grid storage ended successfully");
909 } else { // Grid storage error (process ok, but object put in local storage)
910 UpdateShuttleStatus(AliShuttleStatus::kStoreFailed);
919 //______________________________________________________________________________________________
920 Bool_t AliShuttle::QueryShuttleLogbook(const char* whereClause,
923 // Query DAQ's Shuttle logbook and fills detector status object.
924 // Call QueryRunParameters to query DAQ logbook for run parameters.
928 // check connection, in case connect
929 if(!Connect(3)) return kFALSE;
932 sqlQuery = Form("select * from logbook_shuttle %s order by run", whereClause);
934 TSQLResult* aResult = fServer[3]->Query(sqlQuery);
936 AliError(Form("Can't execute query <%s>!", sqlQuery.Data()));
940 AliDebug(2,Form("Query = %s", sqlQuery.Data()));
942 if(aResult->GetRowCount() == 0) {
943 // if(sqlQuery.EndsWith("where shuttle_done=0 order by run")){
944 // Log("SHUTTLE", "QueryShuttleLogbook - All runs in Shuttle Logbook are already DONE");
948 AliInfo("No entries in Shuttle Logbook match request");
954 // TODO Check field count!
955 const UInt_t nCols = 22;
956 if (aResult->GetFieldCount() != (Int_t) nCols) {
957 AliError("Invalid SQL result field number!");
963 while ((aRow = aResult->Next())) {
964 TString runString(aRow->GetField(0), aRow->GetFieldLength(0));
965 Int_t run = runString.Atoi();
967 AliShuttleLogbookEntry *entry = QueryRunParameters(run);
972 for(UInt_t ii = 0; ii < nCols; ii++)
973 entry->SetDetectorStatus(aResult->GetFieldName(ii), aRow->GetField(ii));
975 entries.AddLast(entry);
979 // if(sqlQuery.EndsWith("where shuttle_done=0 order by run"))
980 // Log("SHUTTLE", Form("QueryShuttleLogbook - Found %d unprocessed runs in Shuttle Logbook",
981 // entries.GetEntriesFast()));
986 //______________________________________________________________________________________________
987 AliShuttleLogbookEntry* AliShuttle::QueryRunParameters(Int_t run)
990 // Retrieve run parameters written in the DAQ logbook and sets them into AliShuttleLogbookEntry object
993 // check connection, in case connect
998 sqlQuery.Form("select * from %s where run=%d", fConfig->GetDAQlbTable(), run);
1000 TSQLResult* aResult = fServer[3]->Query(sqlQuery);
1002 AliError(Form("Can't execute query <%s>!", sqlQuery.Data()));
1006 if (aResult->GetRowCount() == 0) {
1007 Log("SHUTTLE", Form("QueryRunParameters - No entry in DAQ Logbook for run %d. Skipping", run));
1012 if (aResult->GetRowCount() > 1) {
1013 AliError(Form("More than one entry in DAQ Logbook for run %d. Skipping", run));
1018 TSQLRow* aRow = aResult->Next();
1021 AliError(Form("Could not retrieve row for run %d. Skipping", run));
1026 AliShuttleLogbookEntry* entry = new AliShuttleLogbookEntry(run);
1028 for (Int_t ii = 0; ii < aResult->GetFieldCount(); ii++)
1029 entry->SetRunParameter(aResult->GetFieldName(ii), aRow->GetField(ii));
1031 UInt_t startTime = entry->GetStartTime();
1032 UInt_t endTime = entry->GetEndTime();
1034 if (!startTime || !endTime || startTime > endTime) {
1036 Form("QueryRunParameters - Invalid parameters for Run %d: startTime = %d, endTime = %d",
1037 run, startTime, endTime));
1050 //______________________________________________________________________________________________
1051 Bool_t AliShuttle::TryToStoreAgain()
1053 // Called in case the detector failed to store the object in Grid OCDB
1054 // It tries to store the object again, if it does not find more recent and overlapping objects
1055 // Calls underlying TryToStoreAgain(const char*) function twice, for OCDB and Reference storage.
1057 AliInfo("Trying to store OCDB data again...");
1058 Bool_t resultCDB = TryToStoreAgain(fgkMainCDB);
1060 AliInfo("Trying to store reference data again...");
1061 Bool_t resultRef = TryToStoreAgain(fgkMainRefStorage);
1063 return resultCDB && resultRef;
1066 //______________________________________________________________________________________________
1067 Bool_t AliShuttle::TryToStoreAgain(TString& gridURI)
1069 // Called by TryToStoreAgain(), performs actual storage retry
1071 TObjArray* gridIds=0;
1073 Bool_t result = kTRUE;
1075 const char* type = 0;
1077 if(gridURI == fgkMainCDB) {
1079 backupURI = fgkLocalCDB;
1080 } else if(gridURI == fgkMainRefStorage) {
1082 backupURI = fgkLocalRefStorage;
1084 AliError(Form("Invalid storage URI: %s", gridURI.Data()));
1088 AliCDBManager* man = AliCDBManager::Instance();
1090 AliCDBStorage *gridSto = man->GetStorage(gridURI);
1092 Log(fCurrentDetector.Data(),
1093 Form("TryToStoreAgain - cannot activate main %s storage", type));
1097 gridIds = gridSto->GetQueryCDBList();
1099 // get objects previously stored in local CDB
1100 AliCDBStorage *backupSto = man->GetStorage(backupURI);
1101 AliCDBPath aPath(GetOfflineDetName(fCurrentDetector.Data()),"*","*");
1102 // Local objects were stored with current run as Grid version!
1103 TList* localEntries = backupSto->GetAll(aPath.GetPath(), GetCurrentRun(), GetCurrentRun());
1104 localEntries->SetOwner(1);
1106 // loop on local stored objects
1107 TIter localIter(localEntries);
1108 AliCDBEntry *aLocEntry = 0;
1109 while((aLocEntry = dynamic_cast<AliCDBEntry*> (localIter.Next()))){
1110 aLocEntry->SetOwner(1);
1111 AliCDBId aLocId = aLocEntry->GetId();
1112 aLocEntry->SetVersion(-1);
1113 aLocEntry->SetSubVersion(-1);
1115 // loop on Grid valid Id's
1116 Bool_t store = kTRUE;
1117 TIter gridIter(gridIds);
1118 AliCDBId* aGridId = 0;
1119 while((aGridId = dynamic_cast<AliCDBId*> (gridIter.Next()))){
1120 // If local object is valid up to infinity we store it only if it is
1121 // the first unprocessed run!
1122 if (aLocId.GetLastRun() == AliCDBRunRange::Infinity())
1124 if (!fFirstUnprocessed[GetDetPos(fCurrentDetector)])
1126 Log(fCurrentDetector.Data(),
1127 ("TryToStoreAgain - This object has validity infinite but "
1128 "there are previous unprocessed runs!"));
1134 if(aGridId->GetPath() != aLocId.GetPath()) continue;
1135 // skip all objects valid up to infinity
1136 if(aGridId->GetLastRun() == AliCDBRunRange::Infinity()) continue;
1137 // if we get here, it means there's already some more recent object stored on Grid!
1143 Log(fCurrentDetector.Data(),
1144 Form("TryToStoreAgain - A more recent object already exists in %s storage: <%s>",
1145 type, aGridId->ToString().Data()));
1146 // removing local filename...
1147 // TODO maybe it's better not to remove it, it was not copied to the Grid!
1149 backupSto->IdToFilename(aLocId, filename);
1150 AliInfo(Form("Removing local file %s", filename.Data()));
1151 gSystem->Exec(Form("rm %s",filename.Data()));
1155 // If we get here, the file can be stored!
1156 Bool_t storeOk = gridSto->Put(aLocEntry);
1158 Log(fCurrentDetector.Data(),
1159 Form("TryToStoreAgain - Object <%s> successfully put into %s storage",
1160 aLocId.ToString().Data(), type));
1162 // removing local filename...
1164 backupSto->IdToFilename(aLocId, filename);
1165 AliInfo(Form("Removing local file %s", filename.Data()));
1166 gSystem->Exec(Form("rm %s", filename.Data()));
1169 Log(fCurrentDetector.Data(),
1170 Form("TryToStoreAgain - Grid %s storage of object <%s> failed again",
1171 type, aLocId.ToString().Data()));
1175 localEntries->Clear();
1180 //______________________________________________________________________________________________
1181 Bool_t AliShuttle::GetValueSet(const char* host, Int_t port, const char* entry,
1182 TObjArray* valueSet, DCSType type)
1184 // Retrieve all "entry" data points from the DCS server
1185 // host, port: TSocket connection parameters
1186 // entry: name of the alias or data point
1187 // valueSet: array of retrieved AliDCSValue's
1188 // type: kAlias or kDP
1190 AliDCSClient client(host, port, fTimeout, fRetries);
1191 if (!client.IsConnected())
1200 result = client.GetAliasValues(entry,
1201 GetCurrentStartTime(), GetCurrentEndTime(), valueSet);
1205 result = client.GetDPValues(entry,
1206 GetCurrentStartTime(), GetCurrentEndTime(), valueSet);
1211 Log(fCurrentDetector.Data(), Form("GetValueSet - Can't get '%s'! Reason: %s",
1212 entry, AliDCSClient::GetErrorString(result)));
1214 if (result == AliDCSClient::fgkServerError)
1216 Log(fCurrentDetector.Data(), Form("GetValueSet - Server error: %s",
1217 client.GetServerError().Data()));
1226 //______________________________________________________________________________________________
1227 const char* AliShuttle::GetFile(Int_t system, const char* detector,
1228 const char* id, const char* source)
1230 // Get calibration file from file exchange servers
1231 // calls specific getter according to system index (kDAQ, kDCS, kHLT)
1235 return GetDAQFileName(detector, id, source);
1238 return GetDCSFileName(detector, id, source);
1241 return GetHLTFileName(detector, id, source);
1244 AliError(Form("No valid system index: %d",system));
1250 //______________________________________________________________________________________________
1251 TList* AliShuttle::GetFileSources(Int_t system, const char* detector, const char* id)
1253 // Get sources producing the condition file Id from file exchange servers
1254 // calls specific getter according to system index (kDAQ, kDCS, kHLT)
1258 return GetDAQFileSources(detector, id);
1261 return GetDCSFileSources(detector, id);
1264 return GetHLTFileSources(detector, id);
1267 AliError(Form("No valid system index: %d",system));
1273 //______________________________________________________________________________________________
1274 Bool_t AliShuttle::Connect(Int_t system)
1276 // Connect to MySQL Server of the system's FXS MySQL databases
1277 // DAQ Logbook, Shuttle Logbook and DAQ FXS db are on the same host
1279 // check connection: if already connected return
1280 if(fServer[system] && fServer[system]->IsConnected()) return kTRUE;
1282 TString dbHost, dbUser, dbPass, dbName;
1284 if (system < 3) // FXS db servers
1286 dbHost = Form("mysql://%s:%d", fConfig->GetFXSdbHost(system), fConfig->GetFXSdbPort(system));
1287 dbUser = fConfig->GetFXSdbUser(system);
1288 dbPass = fConfig->GetFXSdbPass(system);
1289 dbName = fConfig->GetFXSdbName(system);
1290 } else { // Run & Shuttle logbook servers
1291 // TODO Will the Shuttle logbook server be the same as the Run logbook server ???
1292 dbHost = Form("mysql://%s:%d", fConfig->GetDAQlbHost(), fConfig->GetDAQlbPort());
1293 dbUser = fConfig->GetDAQlbUser();
1294 dbPass = fConfig->GetDAQlbPass();
1295 dbName = fConfig->GetDAQlbDB();
1298 fServer[system] = TSQLServer::Connect(dbHost.Data(), dbUser.Data(), dbPass.Data());
1299 if (!fServer[system] || !fServer[system]->IsConnected()) {
1302 AliError(Form("Can't establish connection to FXS database for %s",
1303 AliShuttleInterface::GetSystemName(system)));
1305 AliError("Can't establish connection to Run logbook.");
1307 if(fServer[system]) delete fServer[system];
1312 // TODO in the configuration should the table name be there too?
1313 TSQLResult* aResult=0;
1316 aResult = fServer[kDAQ]->GetTables(dbName.Data());
1319 //aResult = fServer[kDCS]->GetTables(dbName.Data());
1322 aResult = fServer[kHLT]->GetTables(dbName.Data());
1325 aResult = fServer[3]->GetTables(dbName.Data());
1333 //______________________________________________________________________________________________
1334 const char* AliShuttle::GetDAQFileName(const char* detector, const char* id, const char* source)
1336 // Retrieves a file from the DAQ FXS.
1337 // First queris the DAQ FXS database for the DAQ file name, using the run, detector, id and source info
1338 // then calls RetrieveDAQFile(DAQfilename) for actual copy to local disk
1339 // run: current run being processed (given by Logbook entry fLogbookEntry)
1340 // detector: the Preprocessor name
1341 // id: provided as a parameter by the Preprocessor
1342 // source: provided by the Preprocessor through GetFileSources function
1344 // check connection, in case connect
1347 Log(detector, "GetDAQFileName - Couldn't connect to DAQ FXS database");
1351 // Query preparation
1352 TString sqlQueryStart = Form("select filePath from %s where", fConfig->GetFXSdbTable(kDAQ));
1353 TString whereClause = Form("run=%d and detector=\"%s\" and fileId=\"%s\" and DAQsource=\"%s\"",
1354 GetCurrentRun(), detector, id, source);
1355 TString sqlQuery = Form("%s %s", sqlQueryStart.Data(), whereClause.Data());
1357 AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
1360 TSQLResult* aResult = 0;
1361 aResult = dynamic_cast<TSQLResult*> (fServer[kDAQ]->Query(sqlQuery));
1363 Log(detector, Form("GetDAQFileName - Can't execute SQL query for: id = %s, source = %s",
1368 if(aResult->GetRowCount() == 0)
1371 Form("GetDAQFileName - No entry in FXS table for: id = %s, source = %s",
1377 if (aResult->GetRowCount() > 1) {
1379 Form("GetDAQFileName - More than one entry in FXS table for: id = %s, source = %s",
1385 TSQLRow* aRow = dynamic_cast<TSQLRow*> (aResult->Next());
1388 Log(detector, Form("GetDAQFileName - Empty set result from query: id = %s, source = %s",
1394 TString filePath(aRow->GetField(0), aRow->GetFieldLength(0));
1399 AliDebug(2, Form("filePath = %s",filePath.Data()));
1401 // retrieved file is renamed to make it unique
1402 TString localFileName = Form("DAQ_%s_%d_%s_%s.shuttle",
1403 detector, GetCurrentRun(), id, source);
1405 // file retrieval from DAQ FXS
1406 Bool_t result = RetrieveDAQFile(filePath.Data(), localFileName.Data());
1408 Log(detector, Form("GetDAQFileName - Copy of file %s from DAQ FXS failed", filePath.Data()));
1411 AliInfo(Form("File %s copied from DAQ FXS into %s/%s",
1412 filePath.Data(), GetShuttleTempDir(), localFileName.Data()));
1415 fFXSCalled[kDAQ]=kTRUE;
1416 TObjString *fileParams = new TObjString(Form("%s#!?!#%s", id, source));
1417 fFXSlist[kDAQ].Add(fileParams);
1419 static TString fullLocalFileName;
1420 fullLocalFileName = TString::Format("%s/%s", GetShuttleTempDir(), localFileName.Data());
1422 AliInfo(Form("fullLocalFileName = %s", fullLocalFileName.Data()));
1424 return fullLocalFileName.Data();
1428 //______________________________________________________________________________________________
1429 Bool_t AliShuttle::RetrieveDAQFile(const char* daqFileName, const char* localFileName)
1431 // Copies file from DAQ FXS to local Shuttle machine
1433 // check temp directory: trying to cd to temp; if it does not exist, create it
1434 AliDebug(2, Form("Copy file %s from DAQ FXS into %s/%s",
1435 daqFileName, GetShuttleTempDir(), localFileName));
1437 void* dir = gSystem->OpenDirectory(GetShuttleTempDir());
1439 if (gSystem->mkdir(GetShuttleTempDir(), kTRUE)) {
1440 AliError(Form("Can't open directory <%s>", GetShuttleTempDir()));
1445 gSystem->FreeDirectory(dir);
1448 TString baseDAQFXSFolder = "FES";
1449 TString command = Form("scp -oPort=%d -2 %s@%s:%s/%s %s/%s",
1450 fConfig->GetFXSPort(kDAQ),
1451 fConfig->GetFXSUser(kDAQ),
1452 fConfig->GetFXSHost(kDAQ),
1453 baseDAQFXSFolder.Data(),
1455 GetShuttleTempDir(),
1458 AliDebug(2, Form("%s",command.Data()));
1460 UInt_t nRetries = 0;
1461 UInt_t maxRetries = 3;
1463 // copy!! if successful TSystem::Exec returns 0
1464 while(nRetries++ < maxRetries) {
1465 AliDebug(2, Form("Trying to copy file. Retry # %d", nRetries));
1466 if(gSystem->Exec(command.Data()) == 0) return kTRUE;
1473 //______________________________________________________________________________________________
1474 TList* AliShuttle::GetDAQFileSources(const char* detector, const char* id)
1476 // Retrieves list of DAQ sources of file Id
1478 // check connection, in case connect
1480 Log(detector, "GetDAQFileSources - Couldn't connect to DAQ FXS database");
1484 // Query preparation
1485 TString sqlQueryStart = Form("select DAQsource from %s where", fConfig->GetFXSdbTable(kDAQ));
1486 TString whereClause = Form("run=%d and detector=\"%s\" and fileId=\"%s\"",
1487 GetCurrentRun(), detector, id);
1488 TString sqlQuery = Form("%s %s", sqlQueryStart.Data(), whereClause.Data());
1490 AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
1493 TSQLResult* aResult;
1494 aResult = fServer[kDAQ]->Query(sqlQuery);
1496 Log(detector, Form("GetDAQFileSources - Can't execute SQL query for id: %s", id));
1500 if (aResult->GetRowCount() == 0) {
1502 Form("GetDAQFileSources - No entry in FXS table for id: %s", id));
1508 TList *list = new TList();
1511 while((aRow = aResult->Next())){
1513 TString daqSource(aRow->GetField(0), aRow->GetFieldLength(0));
1514 AliDebug(2, Form("daqSource = %s", daqSource.Data()));
1515 list->Add(new TObjString(daqSource));
1524 //______________________________________________________________________________________________
1525 const char* AliShuttle::GetDCSFileName(const char* /*detector*/, const char* /*id*/, const char* /*source*/){
1526 // Retrieves a file from the DCS FXS.
1528 return "You're in DCS";
1532 //______________________________________________________________________________________________
1533 TList* AliShuttle::GetDCSFileSources(const char* /*detector*/, const char* /*id*/){
1534 // Retrieves file sources from the DCS FXS.
1540 //______________________________________________________________________________________________
1541 const char* AliShuttle::GetHLTFileName(const char* detector, const char* id, const char* source){
1542 // Retrieves a file from the HLT FXS.
1543 // First queris the HLT FXS database for the HLT file name, using the run, detector, id and source info
1544 // then calls RetrieveDAQFile(DAQfilename) for actual copy to local disk
1545 // run: current run being processed (given by Logbook entry fLogbookEntry)
1546 // detector: the Preprocessor name
1547 // id: provided as a parameter by the Preprocessor
1548 // source: provided by the Preprocessor through GetFileSources function
1550 // check connection, in case connect
1553 Log(detector, "GetHLTFileName - Couldn't connect to HLT FXS database");
1557 // Query preparation
1558 TString sqlQueryStart = Form("select filePath,fileSize,fileChecksum from %s where",
1559 fConfig->GetFXSdbTable(kHLT));
1560 TString whereClause = Form("run=%d and detector=\"%s\" and fileId=\"%s\" and DDLnumbers=\"%s\"",
1561 GetCurrentRun(), detector, id, source);
1562 TString sqlQuery = Form("%s %s", sqlQueryStart.Data(), whereClause.Data());
1564 AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
1567 TSQLResult* aResult = 0;
1568 aResult = dynamic_cast<TSQLResult*> (fServer[kHLT]->Query(sqlQuery));
1570 Log(detector, Form("GetHLTFileName - Can't execute SQL query for: id = %s, source = %s",
1575 if(aResult->GetRowCount() == 0)
1578 Form("GetHLTFileName - No entry in FXS table for: id = %s, source = %s",
1584 if (aResult->GetRowCount() > 1) {
1586 Form("GetHLTFileName - More than one entry in FXS table for: id = %s, source = %s",
1592 if (aResult->GetFieldCount() != 3) {
1594 Form("GetHLTFileName - Wrong field count in FXS table for: id = %s, source = %s",
1600 TSQLRow* aRow = dynamic_cast<TSQLRow*> (aResult->Next());
1603 Log(detector, Form("GetHLTFileName - Empty set result from query: id = %s, source = %s",
1609 TString filePath(aRow->GetField(0), aRow->GetFieldLength(0));
1610 TString fileSize(aRow->GetField(1), aRow->GetFieldLength(1));
1611 TString fileMd5Sum(aRow->GetField(2), aRow->GetFieldLength(2));
1616 AliDebug(2, Form("filePath = %s",filePath.Data()));
1618 // The full file path in HLT FXS is runNb/DET/DDLnumber/filePath
1619 // TString fullFilePath = Form("%d/%s/%s/%s", GetCurrentRun(), detector, source, filePath.Data());
1621 // retrieved file is renamed to make it unique
1622 TString localFileName = Form("HLT_%s_%d_%s_%s.shuttle",
1623 detector, GetCurrentRun(), id, source);
1625 // file retrieval from HLT FXS
1626 Bool_t result = RetrieveHLTFile(filePath.Data(), localFileName.Data());
1629 Log(detector, Form("GetHLTFileName - Copy of file %s from HLT FXS failed", filePath.Data()));
1632 AliInfo(Form("File %s copied from HLT FXS into %s/%s",
1633 filePath.Data(), GetShuttleTempDir(), localFileName.Data()));
1636 // compare md5sum of local file with the one stored in the HLT DB
1637 Int_t md5Comp = gSystem->Exec(Form("md5sum %s/%s |grep %s 2>&1 > /dev/null",
1638 GetShuttleTempDir(), localFileName.Data(), fileMd5Sum.Data()));
1642 Log(detector, Form("GetHLTFileName - md5sum of file %s does not match with local copy!", filePath.Data()));
1646 fFXSCalled[kHLT]=kTRUE;
1647 TObjString *fileParams = new TObjString(Form("%s#!?!#%s", id, source));
1648 fFXSlist[kHLT].Add(fileParams);
1650 static TString fullLocalFileName;
1651 fullLocalFileName = TString::Format("%s/%s", GetShuttleTempDir(), localFileName.Data());
1653 AliInfo(Form("fullLocalFileName = %s", fullLocalFileName.Data()));
1655 return fullLocalFileName.Data();
1659 //______________________________________________________________________________________________
1660 Bool_t AliShuttle::RetrieveHLTFile(const char* hltFileName, const char* localFileName)
1662 // Copies file from HLT FXS to local Shuttle machine
1664 // check temp directory: trying to cd to temp; if it does not exist, create it
1665 AliDebug(2, Form("Copy file %s from HLT FXS into %s/%s",
1666 hltFileName, GetShuttleTempDir(), localFileName));
1668 void* dir = gSystem->OpenDirectory(GetShuttleTempDir());
1670 if (gSystem->mkdir(GetShuttleTempDir(), kTRUE)) {
1671 AliError(Form("Can't open directory <%s>", GetShuttleTempDir()));
1676 gSystem->FreeDirectory(dir);
1679 TString baseHLTFXSFolder = "~";
1680 TString command = Form("scp -oPort=%d %s@%s:%s/%s %s/%s",
1681 fConfig->GetFXSPort(kHLT),
1682 fConfig->GetFXSUser(kHLT),
1683 fConfig->GetFXSHost(kHLT),
1684 baseHLTFXSFolder.Data(),
1686 GetShuttleTempDir(),
1689 AliDebug(2, Form("%s",command.Data()));
1691 UInt_t nRetries = 0;
1692 UInt_t maxRetries = 3;
1694 // copy!! if successful TSystem::Exec returns 0
1695 while(nRetries++ < maxRetries) {
1696 AliDebug(2, Form("Trying to copy file. Retry # %d", nRetries));
1697 if(gSystem->Exec(command.Data()) == 0) return kTRUE;
1704 //______________________________________________________________________________________________
1705 TList* AliShuttle::GetHLTFileSources(const char* detector, const char* id){
1706 // Retrieves list of HLT sources (DDLnumbers) of file Id
1708 // check connection, in case connect
1710 Log(detector, "GetHLTFileSources - Couldn't connect to HLT FXS database");
1714 // Query preparation
1715 TString sqlQueryStart = Form("select DDLnumbers from %s where", fConfig->GetFXSdbTable(kHLT));
1716 TString whereClause = Form("run=%d and detector=\"%s\" and fileId=\"%s\"",
1717 GetCurrentRun(), detector, id);
1718 TString sqlQuery = Form("%s %s", sqlQueryStart.Data(), whereClause.Data());
1720 AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
1723 TSQLResult* aResult;
1724 aResult = fServer[kHLT]->Query(sqlQuery);
1726 Log(detector, Form("GetHLTFileSources - Can't execute SQL query for id: %s", id));
1730 if (aResult->GetRowCount() == 0) {
1732 Form("GetHLTFileSources - No entry in FXS table for id: %s", id));
1738 TList *list = new TList();
1741 while((aRow = aResult->Next())){
1743 TString ddlNumbers(aRow->GetField(0), aRow->GetFieldLength(0));
1744 AliDebug(2, Form("DDLnumbers = %s", ddlNumbers.Data()));
1745 list->Add(new TObjString(ddlNumbers));
1754 //______________________________________________________________________________________________
1755 Bool_t AliShuttle::UpdateDAQTable()
1757 // Update DAQ table filling time_processed field in all rows corresponding to current run and detector
1759 // check connection, in case connect
1761 Log(fCurrentDetector, "UpdateDAQTable - Couldn't connect to DAQ FXS database");
1765 TTimeStamp now; // now
1767 // Loop on FXS list entries
1768 TIter iter(&fFXSlist[kDAQ]);
1769 TObjString *aFXSentry=0;
1770 while((aFXSentry = dynamic_cast<TObjString*> (iter.Next()))){
1771 TString aFXSentrystr = aFXSentry->String();
1772 TObjArray *aFXSarray = aFXSentrystr.Tokenize("#!?!#");
1773 if(!aFXSarray || aFXSarray->GetEntries() != 2 ) {
1774 Log(fCurrentDetector, Form("UpdateDAQTable - error updating FXS entry. Check string: <%s>",
1775 aFXSentrystr.Data()));
1776 if(aFXSarray) delete aFXSarray;
1779 const char* fileId = ((TObjString*) aFXSarray->At(0))->GetName();
1780 const char* daqSource = ((TObjString*) aFXSarray->At(1))->GetName();
1781 TString whereClause = Form("where run=%d and detector=\"%s\" and fileId=\"%s\" and DAQsource=\"%s\";",
1782 GetCurrentRun(), fCurrentDetector.Data(), fileId, daqSource);
1786 TString sqlQuery = Form("update %s set time_processed=%d %s", fConfig->GetFXSdbTable(kDAQ),
1787 now.GetSec(), whereClause.Data());
1789 AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
1792 TSQLResult* aResult;
1793 aResult = dynamic_cast<TSQLResult*> (fServer[kDAQ]->Query(sqlQuery));
1795 Log(fCurrentDetector, Form("UpdateDAQTable - Can't execute SQL query <%s>", sqlQuery.Data()));
1804 //______________________________________________________________________________________________
1805 Bool_t AliShuttle::UpdateHLTTable()
1807 // Update HLT table filling time_processed field in all rows corresponding to current run and detector
1809 // check connection, in case connect
1811 Log(fCurrentDetector, "UpdateHLTTable - Couldn't connect to HLT FXS database");
1815 TTimeStamp now; // now
1817 // Loop on FXS list entries
1818 TIter iter(&fFXSlist[kHLT]);
1819 TObjString *aFXSentry=0;
1820 while((aFXSentry = dynamic_cast<TObjString*> (iter.Next()))){
1821 TString aFXSentrystr = aFXSentry->String();
1822 TObjArray *aFXSarray = aFXSentrystr.Tokenize("#!?!#");
1823 if(!aFXSarray || aFXSarray->GetEntries() != 2 ) {
1824 Log(fCurrentDetector, Form("UpdateHLTTable - error updating FXS entry. Check string: <%s>",
1825 aFXSentrystr.Data()));
1826 if(aFXSarray) delete aFXSarray;
1829 const char* fileId = ((TObjString*) aFXSarray->At(0))->GetName();
1830 const char* hltSource = ((TObjString*) aFXSarray->At(1))->GetName();
1831 TString whereClause = Form("where run=%d and detector=\"%s\" and fileId=\"%s\" and DDLnumbers=\"%s\";",
1832 GetCurrentRun(), fCurrentDetector.Data(), fileId, hltSource);
1836 TString sqlQuery = Form("update %s set time_processed=%d %s", fConfig->GetFXSdbTable(kHLT),
1837 now.GetSec(), whereClause.Data());
1839 AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
1842 TSQLResult* aResult;
1843 aResult = dynamic_cast<TSQLResult*> (fServer[kHLT]->Query(sqlQuery));
1845 Log(fCurrentDetector, Form("UpdateHLTTable - Can't execute SQL query <%s>", sqlQuery.Data()));
1854 //______________________________________________________________________________________________
1855 Bool_t AliShuttle::UpdateShuttleLogbook(const char* detector, const char* status)
1858 // Update Shuttle logbook filling detector or shuttle_done column
1859 // ex. of usage: UpdateShuttleLogbook("PHOS", "DONE") or UpdateShuttleLogbook("shuttle_done")
1862 // check connection, in case connect
1864 Log("SHUTTLE", "UpdateShuttleLogbook - Couldn't connect to DAQ Logbook.");
1868 TString detName(detector);
1870 if(detName == "shuttle_done")
1872 setClause = "set shuttle_done=1";
1874 // Send the information to ML
1875 TMonaLisaText mlStatus("SHUTTLE_status", "Done");
1878 mlList.Add(&mlStatus);
1880 fMonaLisa->SendParameters(&mlList);
1882 TString statusStr(status);
1883 if(statusStr.Contains("done", TString::kIgnoreCase) ||
1884 statusStr.Contains("failed", TString::kIgnoreCase)){
1885 setClause = Form("set %s=\"%s\"", detector, status);
1888 Form("UpdateShuttleLogbook - Invalid status <%s> for detector %s",
1894 TString whereClause = Form("where run=%d", GetCurrentRun());
1896 TString sqlQuery = Form("update logbook_shuttle %s %s",
1897 setClause.Data(), whereClause.Data());
1899 AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
1902 TSQLResult* aResult;
1903 aResult = dynamic_cast<TSQLResult*> (fServer[3]->Query(sqlQuery));
1905 Log("SHUTTLE", Form("UpdateShuttleLogbook - Can't execute query <%s>", sqlQuery.Data()));
1913 //______________________________________________________________________________________________
1914 Int_t AliShuttle::GetCurrentRun() const
1916 // Get current run from logbook entry
1918 return fLogbookEntry ? fLogbookEntry->GetRun() : -1;
1921 //______________________________________________________________________________________________
1922 UInt_t AliShuttle::GetCurrentStartTime() const
1924 // get current start time
1926 return fLogbookEntry ? fLogbookEntry->GetStartTime() : 0;
1929 //______________________________________________________________________________________________
1930 UInt_t AliShuttle::GetCurrentEndTime() const
1932 // get current end time from logbook entry
1934 return fLogbookEntry ? fLogbookEntry->GetEndTime() : 0;
1937 //______________________________________________________________________________________________
1938 void AliShuttle::Log(const char* detector, const char* message)
1940 // Fill log string with a message
1942 void* dir = gSystem->OpenDirectory(GetShuttleLogDir());
1944 if (gSystem->mkdir(GetShuttleLogDir(), kTRUE)) {
1945 AliError(Form("Can't open directory <%s>", GetShuttleLogDir()));
1950 gSystem->FreeDirectory(dir);
1953 TString toLog = Form("%s (%d): %s - ", TTimeStamp(time(0)).AsString("s"), getpid(), detector);
1954 if (GetCurrentRun() >= 0)
1955 toLog += Form("run %d - ", GetCurrentRun());
1956 toLog += Form("%s", message);
1958 AliInfo(toLog.Data());
1961 if (GetCurrentRun() >= 0)
1962 fileName.Form("%s/%s_%d.log", GetShuttleLogDir(), detector, GetCurrentRun());
1964 fileName.Form("%s/%s.log", GetShuttleLogDir(), detector);
1966 gSystem->ExpandPathName(fileName);
1969 logFile.open(fileName, ofstream::out | ofstream::app);
1971 if (!logFile.is_open()) {
1972 AliError(Form("Could not open file %s", fileName.Data()));
1976 logFile << toLog.Data() << "\n";
1981 //______________________________________________________________________________________________
1982 Bool_t AliShuttle::Collect(Int_t run)
1985 // Collects conditions data for all UNPROCESSED run written to DAQ LogBook in case of run = -1 (default)
1986 // If a dedicated run is given this run is processed
1988 // In operational mode, this is the Shuttle function triggered by the EOR signal.
1992 Log("SHUTTLE","Collect - Shuttle called. Collecting conditions data for unprocessed runs");
1994 Log("SHUTTLE", Form("Collect - Shuttle called. Collecting conditions data for run %d", run));
1996 SetLastAction("Starting");
1998 TString whereClause("where shuttle_done=0");
2000 whereClause += Form(" and run=%d", run);
2002 TObjArray shuttleLogbookEntries;
2003 if (!QueryShuttleLogbook(whereClause, shuttleLogbookEntries))
2005 Log("SHUTTLE", "Collect - Can't retrieve entries from Shuttle logbook");
2009 if (shuttleLogbookEntries.GetEntries() == 0)
2012 Log("SHUTTLE","Collect - Found no UNPROCESSED runs in Shuttle logbook");
2014 Log("SHUTTLE", Form("Collect - Run %d is already DONE "
2015 "or it does not exist in Shuttle logbook", run));
2019 for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
2020 fFirstUnprocessed[iDet] = kTRUE;
2024 // query Shuttle logbook for earlier runs, check if some detectors are unprocessed,
2025 // flag them into fFirstUnprocessed array
2026 TString whereClause(Form("where shuttle_done=0 and run < %d", run));
2027 TObjArray tmpLogbookEntries;
2028 if (!QueryShuttleLogbook(whereClause, tmpLogbookEntries))
2030 Log("SHUTTLE", "Collect - Can't retrieve entries from Shuttle logbook");
2034 TIter iter(&tmpLogbookEntries);
2035 AliShuttleLogbookEntry* anEntry = 0;
2036 while ((anEntry = dynamic_cast<AliShuttleLogbookEntry*> (iter.Next())))
2038 for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
2040 if (anEntry->GetDetectorStatus(iDet) == AliShuttleLogbookEntry::kUnprocessed)
2042 AliDebug(2, Form("Run %d: setting %s as \"not first time unprocessed\"",
2043 anEntry->GetRun(), GetDetName(iDet)));
2044 fFirstUnprocessed[iDet] = kFALSE;
2052 if (!RetrieveConditionsData(shuttleLogbookEntries))
2054 Log("SHUTTLE", "Collect - Process of at least one run failed");
2058 Log("SHUTTLE", "Collect - Requested run(s) successfully processed");
2062 //______________________________________________________________________________________________
2063 Bool_t AliShuttle::RetrieveConditionsData(const TObjArray& dateEntries)
2065 // Retrieve conditions data for all runs that aren't processed yet
2067 Bool_t hasError = kFALSE;
2069 TIter iter(&dateEntries);
2070 AliShuttleLogbookEntry* anEntry;
2072 while ((anEntry = (AliShuttleLogbookEntry*) iter.Next())){
2073 if (!Process(anEntry)){
2078 return hasError == kFALSE;
2081 //______________________________________________________________________________________________
2082 ULong_t AliShuttle::GetTimeOfLastAction() const
2086 fMonitoringMutex->Lock();
2088 tmp = fLastActionTime;
2090 fMonitoringMutex->UnLock();
2095 //______________________________________________________________________________________________
2096 const TString AliShuttle::GetLastAction() const
2098 // returns a string description of the last action
2102 fMonitoringMutex->Lock();
2106 fMonitoringMutex->UnLock();
2111 //______________________________________________________________________________________________
2112 void AliShuttle::SetLastAction(const char* action)
2114 // updates the monitoring variables
2116 fMonitoringMutex->Lock();
2118 fLastAction = action;
2119 fLastActionTime = time(0);
2121 fMonitoringMutex->UnLock();
2124 //______________________________________________________________________________________________
2125 const char* AliShuttle::GetRunParameter(const char* param)
2127 // returns run parameter read from DAQ logbook
2129 if(!fLogbookEntry) {
2130 AliError("No logbook entry!");
2134 return fLogbookEntry->GetRunParameter(param);
2137 //______________________________________________________________________________________________
2138 Bool_t AliShuttle::SendMail()
2140 // sends a mail to the subdetector expert in case of preprocessor error
2142 void* dir = gSystem->OpenDirectory(GetShuttleLogDir());
2145 if (gSystem->mkdir(GetShuttleLogDir(), kTRUE))
2147 AliError(Form("Can't open directory <%s>", GetShuttleLogDir()));
2152 gSystem->FreeDirectory(dir);
2155 TString bodyFileName;
2156 bodyFileName.Form("%s/mail.body", GetShuttleLogDir());
2157 gSystem->ExpandPathName(bodyFileName);
2160 mailBody.open(bodyFileName, ofstream::out);
2162 if (!mailBody.is_open())
2164 AliError(Form("Could not open mail body file %s", bodyFileName.Data()));
2169 TIter iterExperts(fConfig->GetResponsibles(fCurrentDetector));
2170 TObjString *anExpert=0;
2171 while ((anExpert = (TObjString*) iterExperts.Next()))
2173 to += Form("%s,", anExpert->GetName());
2175 to.Remove(to.Length()-1);
2176 AliDebug(2, Form("to: %s",to.Data()));
2178 // TODO this will be removed...
2179 if (to.Contains("not_yet_set")) {
2180 AliInfo("List of detector responsibles not yet set!");
2184 TString cc="alberto.colla@cern.ch";
2186 TString subject = Form("%s Shuttle preprocessor error in run %d !",
2187 fCurrentDetector.Data(), GetCurrentRun());
2188 AliDebug(2, Form("subject: %s", subject.Data()));
2190 TString body = Form("Dear %s expert(s), \n\n", fCurrentDetector.Data());
2191 body += Form("SHUTTLE just detected that your preprocessor "
2192 "exited with ERROR state in run %d!!\n\n", GetCurrentRun());
2193 body += Form("Please check %s status on the web page asap!\n\n", fCurrentDetector.Data());
2194 body += Form("The last 10 lines of %s log file are following:\n\n");
2196 AliDebug(2, Form("Body begin: %s", body.Data()));
2198 mailBody << body.Data();
2200 mailBody.open(bodyFileName, ofstream::out | ofstream::app);
2202 TString logFileName = Form("%s/%s.log", GetShuttleLogDir(), fCurrentDetector.Data());
2203 TString tailCommand = Form("tail -n 10 %s >> %s", logFileName.Data(), bodyFileName.Data());
2204 if (gSystem->Exec(tailCommand.Data()))
2206 mailBody << Form("%s log file not found ...\n\n", fCurrentDetector.Data());
2209 TString endBody = Form("------------------------------------------------------\n\n");
2210 endBody += Form("In case of problems please contact the SHUTTLE core team.\n\n");
2211 endBody += "Please do not answer this message directly, it is automatically generated.\n\n";
2212 endBody += "Sincerely yours,\n\n \t\t\tthe SHUTTLE\n";
2214 AliDebug(2, Form("Body end: %s", endBody.Data()));
2216 mailBody << endBody.Data();
2221 TString mailCommand = Form("mail -s \"%s\" -c %s %s < %s",
2225 bodyFileName.Data());
2226 AliDebug(2, Form("mail command: %s", mailCommand.Data()));
2228 Bool_t result = gSystem->Exec(mailCommand.Data());
2233 //______________________________________________________________________________________________
2234 void AliShuttle::SetShuttleTempDir(const char* tmpDir)
2236 // sets Shuttle temp directory
2238 fgkShuttleTempDir = gSystem->ExpandPathName(tmpDir);
2241 //______________________________________________________________________________________________
2242 void AliShuttle::SetShuttleLogDir(const char* logDir)
2244 // sets Shuttle log directory
2246 fgkShuttleLogDir = gSystem->ExpandPathName(logDir);