1 /**************************************************************************
2 * Copyright(c) 1998-1999, ALICE Experiment at CERN, All rights reserved. *
4 * Author: The ALICE Off-line Project. *
5 * Contributors are mentioned in the code where appropriate. *
7 * Permission to use, copy, modify and distribute this software and its *
8 * documentation strictly for non-commercial purposes is hereby granted *
9 * without fee, provided that the above copyright notice appears in all *
10 * copies and that both the copyright notice and this permission notice *
11 * appear in the supporting documentation. The authors make no claims *
12 * about the suitability of this software for any purpose. It is *
13 * provided "as is" without express or implied warranty. *
14 **************************************************************************/
18 Revision 1.21 2006/12/07 08:51:26 jgrosseo
20 table, db names in ldap configuration
21 added GRP preprocessor
22 DCS data can also be retrieved by data point
24 Revision 1.20 2006/11/16 16:16:48 jgrosseo
25 introducing strict run ordering flag
26 removed giving preprocessor name to preprocessor, they have to know their name themselves ;-)
28 Revision 1.19 2006/11/06 14:23:04 jgrosseo
29 major update (Alberto)
30 o) reading of run parameters from the logbook
31 o) online offline naming conversion
32 o) standalone DCSclient package
34 Revision 1.18 2006/10/20 15:22:59 jgrosseo
35 o) Adding time out to the execution of the preprocessors: The Shuttle forks and the parent process monitors the child
36 o) Merging Collect, CollectAll, CollectNew function
37 o) Removing implementation of empty copy constructors (declaration still there!)
39 Revision 1.17 2006/10/05 16:20:55 jgrosseo
40 adapting to new CDB classes
42 Revision 1.16 2006/10/05 15:46:26 jgrosseo
43 applying to the new interface
45 Revision 1.15 2006/10/02 16:38:39 jgrosseo
48 storing of objects that failed to be stored to the grid before
49 interfacing of shuttle status table in daq system
51 Revision 1.14 2006/08/29 09:16:05 jgrosseo
54 Revision 1.13 2006/08/15 10:50:00 jgrosseo
55 effc++ corrections (alberto)
57 Revision 1.12 2006/08/08 14:19:29 jgrosseo
58 Update to shuttle classes (Alberto)
60 - Possibility to set the full object's path in the Preprocessor's and
61 Shuttle's Store functions
62 - Possibility to extend the object's run validity in the same classes
63 ("startValidity" and "validityInfinite" parameters)
64 - Implementation of the StoreReferenceData function to store reference
65 data in a dedicated CDB storage.
67 Revision 1.11 2006/07/21 07:37:20 jgrosseo
68 last run is stored after each run
70 Revision 1.10 2006/07/20 09:54:40 jgrosseo
71 introducing status management: The processing per subdetector is divided into several steps,
72 after each step the status is stored on disk. If the system crashes in any of the steps the Shuttle
73 can keep track of the number of failures and skips further processing after a certain threshold is
74 exceeded. These thresholds can be configured in LDAP.
76 Revision 1.9 2006/07/19 10:09:55 jgrosseo
77 new configuration, accesst to DAQ FES (Alberto)
79 Revision 1.8 2006/07/11 12:44:36 jgrosseo
80 adding parameters for extended validity range of data produced by preprocessor
82 Revision 1.7 2006/07/10 14:37:09 jgrosseo
83 small fix + todo comment
85 Revision 1.6 2006/07/10 13:01:41 jgrosseo
86 enhanced storing of last sucessfully processed run (alberto)
88 Revision 1.5 2006/07/04 14:59:57 jgrosseo
89 revision of AliDCSValue: Removed wrapper classes, reduced storage size per value by factor 2
91 Revision 1.4 2006/06/12 09:11:16 jgrosseo
92 coding conventions (Alberto)
94 Revision 1.3 2006/06/06 14:26:40 jgrosseo
95 o) removed files that were moved to STEER
96 o) shuttle updated to follow the new interface (Alberto)
98 Revision 1.2 2006/03/07 07:52:34 hristov
99 New version (B.Yordanov)
101 Revision 1.6 2005/11/19 17:19:14 byordano
102 RetrieveDATEEntries and RetrieveConditionsData added
104 Revision 1.5 2005/11/19 11:09:27 byordano
105 AliShuttle declaration added
107 Revision 1.4 2005/11/17 17:47:34 byordano
108 TList changed to TObjArray
110 Revision 1.3 2005/11/17 14:43:23 byordano
113 Revision 1.1.1.1 2005/10/28 07:33:58 hristov
114 Initial import as subdirectory in AliRoot
116 Revision 1.2 2005/09/13 08:41:15 byordano
117 default startTime endTime added
119 Revision 1.4 2005/08/30 09:13:02 byordano
122 Revision 1.3 2005/08/29 21:15:47 byordano
128 // This class is the main manager for AliShuttle.
129 // It organizes the data retrieval from DCS and call the
130 // interface methods of AliPreprocessor.
131 // For every detector in AliShuttleConfgi (see AliShuttleConfig),
132 // data for its set of aliases is retrieved. If there is registered
133 // AliPreprocessor for this detector then it will be used
134 // accroding to the schema (see AliPreprocessor).
135 // If there isn't registered AliPreprocessor than the retrieved
136 // data is stored automatically to the undelying AliCDBStorage.
137 // For detSpec is used the alias name.
140 #include "AliShuttle.h"
142 #include "AliCDBManager.h"
143 #include "AliCDBStorage.h"
144 #include "AliCDBId.h"
145 #include "AliCDBRunRange.h"
146 #include "AliCDBPath.h"
147 #include "AliCDBEntry.h"
148 #include "AliShuttleConfig.h"
149 #include "DCSClient/AliDCSClient.h"
151 #include "AliPreprocessor.h"
152 #include "AliShuttleStatus.h"
153 #include "AliShuttleLogbookEntry.h"
158 #include <TTimeStamp.h>
159 #include <TObjString.h>
160 #include <TSQLServer.h>
161 #include <TSQLResult.h>
167 #include <sys/types.h>
168 #include <sys/wait.h>
172 TString AliShuttle::fgkMainCDB("alien://folder=ShuttleCDB");
173 TString AliShuttle::fgkLocalCDB("local://LocalShuttleCDB");
174 TString AliShuttle::fgkMainRefStorage("alien://folder=ShuttleReference");
175 TString AliShuttle::fgkLocalRefStorage("local://LocalReferenceStorage");
177 Bool_t AliShuttle::fgkProcessDCS(kTRUE);
179 const char* AliShuttle::fgkShuttleTempDir = gSystem->ExpandPathName("$ALICE_ROOT/SHUTTLE/temp");
180 const char* AliShuttle::fgkShuttleLogDir = gSystem->ExpandPathName("$ALICE_ROOT/SHUTTLE/log");
182 //______________________________________________________________________________________________
183 AliShuttle::AliShuttle(const AliShuttleConfig* config,
184 UInt_t timeout, Int_t retries):
186 fTimeout(timeout), fRetries(retries),
197 // config: AliShuttleConfig used
198 // timeout: timeout used for AliDCSClient connection
199 // retries: the number of retries in case of connection error.
202 if (!fConfig->IsValid()) AliFatal("********** !!!!! Invalid configuration !!!!! **********");
203 for(int iSys=0;iSys<4;iSys++) {
206 fFXSlist[iSys].SetOwner(kTRUE);
208 fPreprocessorMap.SetOwner(kTRUE);
210 for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
211 fFirstUnprocessed[iDet] = kFALSE;
213 fMonitoringMutex = new TMutex();
216 //______________________________________________________________________________________________
217 AliShuttle::~AliShuttle()
221 fPreprocessorMap.DeleteAll();
222 for(int iSys=0;iSys<4;iSys++)
224 fServer[iSys]->Close();
225 delete fServer[iSys];
234 if (fMonitoringMutex)
236 delete fMonitoringMutex;
237 fMonitoringMutex = 0;
241 //______________________________________________________________________________________________
242 void AliShuttle::RegisterPreprocessor(AliPreprocessor* preprocessor)
245 // Registers new AliPreprocessor.
246 // It uses GetName() for indentificator of the pre processor.
247 // The pre processor is registered it there isn't any other
248 // with the same identificator (GetName()).
251 const char* detName = preprocessor->GetName();
252 if(GetDetPos(detName) < 0)
253 AliFatal(Form("********** !!!!! Invalid detector name: %s !!!!! **********", detName));
255 if (fPreprocessorMap.GetValue(detName)) {
256 AliWarning(Form("AliPreprocessor %s is already registered!", detName));
260 fPreprocessorMap.Add(new TObjString(detName), preprocessor);
262 //______________________________________________________________________________________________
263 UInt_t AliShuttle::Store(const AliCDBPath& path, TObject* object,
264 AliCDBMetaData* metaData, Int_t validityStart, Bool_t validityInfinite)
266 // Stores a CDB object in the storage for offline reconstruction. Objects that are not needed for
267 // offline reconstruction, but should be stored anyway (e.g. for debugging) should NOT be stored
268 // using this function. Use StoreReferenceData instead!
269 // It calls WriteToCDB function which perform actual storage
271 return WriteToCDB(fgkMainCDB, fgkLocalCDB, path, object,
272 metaData, validityStart, validityInfinite);
276 //______________________________________________________________________________________________
277 UInt_t AliShuttle::StoreReferenceData(const AliCDBPath& path, TObject* object, AliCDBMetaData* metaData)
279 // Stores a CDB object in the storage for reference data. This objects will not be available during
280 // offline reconstrunction. Use this function for reference data only!
281 // It calls WriteToCDB function which perform actual storage
283 return WriteToCDB(fgkMainRefStorage, fgkLocalRefStorage, path, object, metaData);
287 //______________________________________________________________________________________________
288 UInt_t AliShuttle::WriteToCDB(const char* mainUri, const char* localUri,
289 const AliCDBPath& path, TObject* object, AliCDBMetaData* metaData,
290 Int_t validityStart, Bool_t validityInfinite)
292 // write object into the CDB. Parameters are passed by Store and StoreReferenceData functions.
293 // The parameters are:
294 // 1) Uri of the main storage (Grid)
295 // 2) Uri of the backup storage (Local)
296 // 3) the object's path.
297 // 4) the object to be stored
298 // 5) the metaData to be associated with the object
299 // 6) the validity start run number w.r.t. the current run,
300 // if the data is valid only for this run leave the default 0
301 // 7) specifies if the calibration data is valid for infinity (this means until updated),
302 // typical for calibration runs, the default is kFALSE
305 // 1 if stored in main (Grid) storage
306 // 2 if stored in backup (Local) storage
308 const char* cdbType = (mainUri == fgkMainCDB) ? "CDB" : "Reference";
310 Int_t firstRun = GetCurrentRun() - validityStart;
312 AliError("First valid run happens to be less than 0! Setting it to 0.");
317 if(validityInfinite) {
318 lastRun = AliCDBRunRange::Infinity();
320 lastRun = GetCurrentRun();
323 AliCDBId id(path, firstRun, lastRun, -1, -1);
325 if(! dynamic_cast<TObjString*> (metaData->GetProperty("RunUsed(TObjString)"))){
326 TObjString runUsed = Form("%d", GetCurrentRun());
327 metaData->SetProperty("RunUsed(TObjString)", runUsed.Clone());
332 if (!(AliCDBManager::Instance()->GetStorage(mainUri))) {
333 AliError(Form("WriteToCDB - Cannot activate main %s storage", cdbType));
335 result = (UInt_t) AliCDBManager::Instance()->GetStorage(mainUri)
336 ->Put(object, id, metaData);
341 Log(fCurrentDetector,
342 Form("WriteToCDB - Problem with main %s storage. Putting <%s> into backup storage",
343 cdbType, path.GetPath().Data()));
345 // Set Grid version to current run number, to ease retrieval later
346 id.SetVersion(GetCurrentRun());
348 result = AliCDBManager::Instance()->GetStorage(localUri)
349 ->Put(object, id, metaData);
355 Log(fCurrentDetector, "WriteToCDB - Can't store data!");
363 //______________________________________________________________________________________________
364 AliShuttleStatus* AliShuttle::ReadShuttleStatus()
366 // Reads the AliShuttleStatus from the CDB
373 fStatusEntry = AliCDBManager::Instance()->GetStorage(AliShuttle::GetLocalCDB())
374 ->Get(Form("/SHUTTLE/STATUS/%s", fCurrentDetector.Data()), GetCurrentRun());
376 if (!fStatusEntry) return 0;
377 fStatusEntry->SetOwner(1);
379 AliShuttleStatus* status = dynamic_cast<AliShuttleStatus*> (fStatusEntry->GetObject());
381 AliError("Invalid object stored to CDB!");
388 //______________________________________________________________________________________________
389 Bool_t AliShuttle::WriteShuttleStatus(AliShuttleStatus* status)
391 // writes the status for one subdetector
398 Int_t run = GetCurrentRun();
400 AliCDBId id(AliCDBPath("SHUTTLE", "STATUS", fCurrentDetector), run, run);
402 fStatusEntry = new AliCDBEntry(status, id, new AliCDBMetaData);
403 fStatusEntry->SetOwner(1);
405 UInt_t result = AliCDBManager::Instance()->GetStorage(fgkLocalCDB)->Put(fStatusEntry);
408 AliError(Form("WriteShuttleStatus for %s, run %d failed", fCurrentDetector.Data(), run));
415 //______________________________________________________________________________________________
416 void AliShuttle::UpdateShuttleStatus(AliShuttleStatus::Status newStatus, Bool_t increaseCount)
418 // changes the AliShuttleStatus for the given detector and run to the given status
421 AliError("UNEXPECTED: fStatusEntry empty");
425 AliShuttleStatus* status = dynamic_cast<AliShuttleStatus*> (fStatusEntry->GetObject());
428 AliError("UNEXPECTED: status could not be read from current CDB entry");
432 TString actionStr = Form("UpdateShuttleStatus - %s: Changing state from %s to %s",
433 fCurrentDetector.Data(),
434 status->GetStatusName(),
435 status->GetStatusName(newStatus));
436 Log("SHUTTLE", actionStr);
437 SetLastAction(actionStr);
439 status->SetStatus(newStatus);
440 if (increaseCount) status->IncreaseCount();
442 AliCDBManager::Instance()->GetStorage(fgkLocalCDB)->Put(fStatusEntry);
444 //______________________________________________________________________________________________
445 Bool_t AliShuttle::ContinueProcessing()
447 // this function reads the AliShuttleStatus information from CDB and
448 // checks if the processing should be continued
449 // if yes it returns kTRUE and updates the AliShuttleStatus with nextStatus
451 if (!fConfig->HostProcessDetector(fCurrentDetector)) return kFALSE;
453 AliPreprocessor* aPreprocessor =
454 dynamic_cast<AliPreprocessor*> (fPreprocessorMap.GetValue(fCurrentDetector));
457 AliInfo(Form("%s: no preprocessor registered", fCurrentDetector.Data()));
461 AliShuttleLogbookEntry::Status entryStatus =
462 fLogbookEntry->GetDetectorStatus(fCurrentDetector);
464 if(entryStatus != AliShuttleLogbookEntry::kUnprocessed) {
465 AliInfo(Form("ContinueProcessing - %s is %s",
466 fCurrentDetector.Data(),
467 fLogbookEntry->GetDetectorStatusName(entryStatus)));
471 // if we get here, according to Shuttle logbook subdetector is in UNPROCESSED state
473 // check if current run is first unprocessed run for current detector
474 if (fConfig->StrictRunOrder(fCurrentDetector) &&
475 !fFirstUnprocessed[GetDetPos(fCurrentDetector)])
477 Log("SHUTTLE", Form("ContinueProcessing - %s requires strict run ordering but this is not the first unprocessed run!"));
481 AliShuttleStatus* status = ReadShuttleStatus();
484 Log("SHUTTLE", Form("ContinueProcessing - %s: Processing first time",
485 fCurrentDetector.Data()));
486 status = new AliShuttleStatus(AliShuttleStatus::kStarted);
487 return WriteShuttleStatus(status);
490 // The following two cases shouldn't happen if Shuttle Logbook was correctly updated.
491 // If it happens it may mean Logbook updating failed... let's do it now!
492 if (status->GetStatus() == AliShuttleStatus::kDone ||
493 status->GetStatus() == AliShuttleStatus::kFailed){
494 Log("SHUTTLE", Form("ContinueProcessing - %s is already %s. Updating Shuttle Logbook",
495 fCurrentDetector.Data(),
496 status->GetStatusName(status->GetStatus())));
497 UpdateShuttleLogbook(fCurrentDetector.Data(),
498 status->GetStatusName(status->GetStatus()));
502 if (status->GetStatus() == AliShuttleStatus::kStoreFailed) {
504 Form("ContinueProcessing - %s: Grid storage of one or more objects failed. Trying again now",
505 fCurrentDetector.Data()));
506 if(TryToStoreAgain()){
507 Log(fCurrentDetector.Data(), "ContinueProcessing - All objects successfully stored into OCDB");
508 UpdateShuttleStatus(AliShuttleStatus::kDone);
509 UpdateShuttleLogbook(fCurrentDetector.Data(), "DONE");
512 Form("ContinueProcessing - %s: Grid storage failed again",
513 fCurrentDetector.Data()));
518 // if we get here, there is a restart
519 Bool_t cont = kFALSE;
522 if (status->GetCount() >= fConfig->GetMaxRetries()) {
523 Log("SHUTTLE", Form("ContinueProcessing - %s failed %d times in status %s - "
524 "Updating Shuttle Logbook", fCurrentDetector.Data(),
525 status->GetCount(), status->GetStatusName()));
526 UpdateShuttleLogbook(fCurrentDetector.Data(), "FAILED");
528 Log("SHUTTLE", Form("ContinueProcessing - %s: restarting. "
529 "Aborted before with %s. Retry number %d.", fCurrentDetector.Data(),
530 status->GetStatusName(), status->GetCount()));
531 UpdateShuttleStatus(AliShuttleStatus::kStarted, kTRUE);
535 // Send mail to detector expert!
536 AliInfo(Form("Sending mail to %s expert...", fCurrentDetector.Data()));
538 Log("SHUTTLE", Form("ContinueProcessing - Could not send mail to %s expert",
539 fCurrentDetector.Data()));
544 //______________________________________________________________________________________________
545 Bool_t AliShuttle::Process(AliShuttleLogbookEntry* entry)
548 // Makes data retrieval for all detectors in the configuration.
549 // entry: Shuttle logbook entry, contains run paramenters and status of detectors
550 // (Unprocessed, Inactive, Failed or Done).
551 // Returns kFALSE in case of error occured and kTRUE otherwise
554 if(!entry) return kFALSE;
556 fLogbookEntry = entry;
558 if(fLogbookEntry->IsDone()){
559 Log("SHUTTLE","Process - Shuttle is already DONE. Updating logbook");
560 UpdateShuttleLogbook("shuttle_done");
566 AliInfo(Form("\n\n \t\t\t^*^*^*^*^*^*^*^*^*^*^*^* run %d: START ^*^*^*^*^*^*^*^*^*^*^*^* \n",
569 fLogbookEntry->Print("all");
572 Bool_t hasError = kFALSE;
573 for(Int_t iSys=0;iSys<3;iSys++) fFXSCalled[iSys]=kFALSE;
575 AliCDBStorage *mainCDBSto = AliCDBManager::Instance()->GetStorage(fgkMainCDB);
576 if(mainCDBSto) mainCDBSto->QueryCDB(GetCurrentRun());
577 AliCDBStorage *mainRefSto = AliCDBManager::Instance()->GetStorage(fgkMainRefStorage);
578 if(mainRefSto) mainRefSto->QueryCDB(GetCurrentRun());
580 // Loop on detectors in the configuration
581 TIter iter(fConfig->GetDetectors());
582 TObjString* aDetector = 0;
584 while ((aDetector = (TObjString*) iter.Next()))
586 fCurrentDetector = aDetector->String();
588 if (ContinueProcessing() == kFALSE) continue;
590 AliInfo(Form("\n\n \t\t\t****** run %d - %s: START ******",
591 GetCurrentRun(), aDetector->GetName()));
598 Log("SHUTTLE", "ERROR: Forking failed");
603 AliInfo(Form("In parent process of %d - %s: Starting monitoring",
604 GetCurrentRun(), aDetector->GetName()));
606 Long_t begin = time(0);
608 int status; // to be used with waitpid, on purpose an int (not Int_t)!
609 while (waitpid(pid, &status, WNOHANG) == 0)
611 Long_t expiredTime = time(0) - begin;
613 if (expiredTime > fConfig->GetPPTimeOut())
615 Log("SHUTTLE", Form("Process time out. Run time: %d seconds. Killing...",
622 gSystem->Sleep(1000);
626 if (expiredTime % 60 == 0)
627 Log("SHUTTLE", Form("Checked process. Run time: %d seconds.",
629 gSystem->Sleep(1000);
633 AliInfo(Form("In parent process of %d - %s: Client has terminated.",
634 GetCurrentRun(), aDetector->GetName()));
636 if (WIFEXITED(status))
638 Int_t returnCode = WEXITSTATUS(status);
640 Log("SHUTTLE", Form("The return code is %d", returnCode));
649 AliInfo(Form("In client process of %d - %s", GetCurrentRun(), aDetector->GetName()));
651 UInt_t result = ProcessCurrentDetector();
653 Int_t returnCode = 0; // will be set to 1 in case of an error
658 AliInfo(Form("\n \t\t\t****** run %d - %s: PREPROCESSOR ERROR ****** \n\n",
659 GetCurrentRun(), aDetector->GetName()));
661 else if (result == 2)
663 AliInfo(Form("\n \t\t\t****** run %d - %s: STORAGE ERROR ****** \n\n",
664 GetCurrentRun(), aDetector->GetName()));
667 AliInfo(Form("\n \t\t\t****** run %d - %s: DONE ****** \n\n",
668 GetCurrentRun(), aDetector->GetName()));
673 // Process successful: Update time_processed field in FXS logbooks!
674 if (fFXSCalled[kDAQ])
676 if (UpdateDAQTable() == kFALSE)
678 fFXSlist[kDAQ].Clear();
680 //if(fFXSCalled[kDCS]) {
681 // if (UpdateDCSTable(aDetector->GetName()) == kFALSE)
683 // fFXSlist[kDCS].Clear();
685 if (fFXSCalled[kHLT])
687 if (UpdateHLTTable() == kFALSE)
689 fFXSlist[kHLT].Clear();
693 AliInfo(Form("Client process of %d - %s is exiting now with %d.",
694 GetCurrentRun(), aDetector->GetName(), returnCode));
696 // the client exits here
697 gSystem->Exit(returnCode);
699 AliError("We should never get here!!!");
703 AliInfo(Form("\n\n \t\t\t^*^*^*^*^*^*^*^*^*^*^*^* run %d: FINISH ^*^*^*^*^*^*^*^*^*^*^*^* \n",
706 //check if shuttle is done for this run, if so update logbook
707 TObjArray checkEntryArray;
708 checkEntryArray.SetOwner(1);
709 TString whereClause = Form("where run=%d", GetCurrentRun());
710 if (!QueryShuttleLogbook(whereClause.Data(), checkEntryArray) || checkEntryArray.GetEntries() == 0) {
711 Log("SHUTTLE", Form("Process - Warning: Cannot check status of run %d on Shuttle logbook!",
713 return hasError == kFALSE;
716 AliShuttleLogbookEntry* checkEntry = dynamic_cast<AliShuttleLogbookEntry*>
717 (checkEntryArray.At(0));
721 if (checkEntry->IsDone())
723 Log("SHUTTLE","Process - Shuttle is DONE. Updating logbook");
724 UpdateShuttleLogbook("shuttle_done");
728 for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
730 if (checkEntry->GetDetectorStatus(iDet) == AliShuttleLogbookEntry::kUnprocessed)
732 AliDebug(2, Form("Run %d: setting %s as \"not first time unprocessed\"",
733 checkEntry->GetRun(), GetDetName(iDet)));
734 fFirstUnprocessed[iDet] = kFALSE;
742 return hasError == kFALSE;
745 //______________________________________________________________________________________________
746 UInt_t AliShuttle::ProcessCurrentDetector()
749 // Makes data retrieval just for a specific detector (fCurrentDetector).
750 // Threre should be a configuration for this detector.
752 AliInfo(Form("Retrieving values for %s, run %d", fCurrentDetector.Data(), GetCurrentRun()));
754 UpdateShuttleStatus(AliShuttleStatus::kDCSStarted);
759 Bool_t aDCSError = kFALSE;
762 // TODO Test only... I've added a flag that allows to
763 // exclude DCS archive DB query
766 AliInfo("Skipping DCS processing!");
769 TString host(fConfig->GetDCSHost(fCurrentDetector));
770 Int_t port = fConfig->GetDCSPort(fCurrentDetector);
772 // Retrieval of Aliases
773 TObjString* anAlias = 0;
774 TIter iterAliases(fConfig->GetDCSAliases(fCurrentDetector));
775 while ((anAlias = (TObjString*) iterAliases.Next()))
777 TObjArray *valueSet = new TObjArray();
778 valueSet->SetOwner(1);
780 AliInfo("Querying DCS archive DB (Aliases)...");
781 aDCSError = (GetValueSet(host, port, anAlias->String(), valueSet, kAlias) == 0);
785 dcsMap.Add(anAlias->Clone(), valueSet);
787 Log(fCurrentDetector,
788 Form("ProcessCurrentDetector - Error while retrieving alias %s",
789 anAlias->GetName()));
790 UpdateShuttleStatus(AliShuttleStatus::kDCSError);
796 // Retrieval of Data Points
798 TIter iterDP(fConfig->GetDCSDataPoints(fCurrentDetector));
799 while ((aDP = (TObjString*) iterDP.Next()))
801 TObjArray *valueSet = new TObjArray();
802 valueSet->SetOwner(1);
803 AliInfo("Querying DCS archive DB (Data Points)...");
804 aDCSError = (GetValueSet(host, port, aDP->String(), valueSet, kDP) == 0);
808 dcsMap.Add(aDP->Clone(), valueSet);
810 Log(fCurrentDetector,
811 Form("ProcessCurrentDetector - Error while retrieving data point %s",
813 UpdateShuttleStatus(AliShuttleStatus::kDCSError);
820 // DCS Archive DB processing successful. Call Preprocessor!
821 UpdateShuttleStatus(AliShuttleStatus::kPPStarted);
823 AliPreprocessor* aPreprocessor =
824 dynamic_cast<AliPreprocessor*> (fPreprocessorMap.GetValue(fCurrentDetector));
826 aPreprocessor->Initialize(GetCurrentRun(), GetCurrentStartTime(), GetCurrentEndTime());
827 UInt_t aPPResult = aPreprocessor->Process(&dcsMap);
829 UInt_t returnValue = 0;
830 if (aPPResult == 0) { // Preprocessor error
831 UpdateShuttleStatus(AliShuttleStatus::kPPError);
833 } else if (fGridError == kFALSE) { // process and Grid storage ok!
834 UpdateShuttleStatus(AliShuttleStatus::kDone);
835 UpdateShuttleLogbook(fCurrentDetector, "DONE");
836 Log(fCurrentDetector.Data(),
837 "ProcessCurrentDetector - Preprocessor and Grid storage ended successfully");
839 } else { // Grid storage error (process ok, but object put in local storage)
840 UpdateShuttleStatus(AliShuttleStatus::kStoreFailed);
849 //______________________________________________________________________________________________
850 Bool_t AliShuttle::QueryShuttleLogbook(const char* whereClause,
853 // Query DAQ's Shuttle logbook and fills detector status object.
854 // Call QueryRunParameters to query DAQ logbook for run parameters.
858 // check connection, in case connect
859 if(!Connect(3)) return kFALSE;
862 sqlQuery = Form("select * from logbook_shuttle %s order by run", whereClause);
864 TSQLResult* aResult = fServer[3]->Query(sqlQuery);
866 AliError(Form("Can't execute query <%s>!", sqlQuery.Data()));
870 AliDebug(2,Form("Query = %s", sqlQuery.Data()));
872 if(aResult->GetRowCount() == 0) {
873 // if(sqlQuery.EndsWith("where shuttle_done=0 order by run")){
874 // Log("SHUTTLE", "QueryShuttleLogbook - All runs in Shuttle Logbook are already DONE");
878 AliInfo("No entries in Shuttle Logbook match request");
884 // TODO Check field count!
885 const UInt_t nCols = 22;
886 if (aResult->GetFieldCount() != (Int_t) nCols) {
887 AliError("Invalid SQL result field number!");
893 while ((aRow = aResult->Next())) {
894 TString runString(aRow->GetField(0), aRow->GetFieldLength(0));
895 Int_t run = runString.Atoi();
897 AliShuttleLogbookEntry *entry = QueryRunParameters(run);
902 for(UInt_t ii = 0; ii < nCols; ii++)
903 entry->SetDetectorStatus(aResult->GetFieldName(ii), aRow->GetField(ii));
905 entries.AddLast(entry);
909 // if(sqlQuery.EndsWith("where shuttle_done=0 order by run"))
910 // Log("SHUTTLE", Form("QueryShuttleLogbook - Found %d unprocessed runs in Shuttle Logbook",
911 // entries.GetEntriesFast()));
916 //______________________________________________________________________________________________
917 AliShuttleLogbookEntry* AliShuttle::QueryRunParameters(Int_t run)
920 // Retrieve run parameters written in the DAQ logbook and sets them into AliShuttleLogbookEntry object
923 // check connection, in case connect
928 sqlQuery.Form("select * from %s where run=%d", fConfig->GetDAQlbTable(), run);
930 TSQLResult* aResult = fServer[3]->Query(sqlQuery);
932 AliError(Form("Can't execute query <%s>!", sqlQuery.Data()));
936 if (aResult->GetRowCount() == 0) {
937 Log("SHUTTLE", Form("QueryRunParameters - No entry in DAQ Logbook for run %d. Skipping", run));
942 if (aResult->GetRowCount() > 1) {
943 AliError(Form("More than one entry in DAQ Logbook for run %d. Skipping", run));
948 TSQLRow* aRow = aResult->Next();
951 AliError(Form("Could not retrieve row for run %d. Skipping", run));
956 AliShuttleLogbookEntry* entry = new AliShuttleLogbookEntry(run);
958 for (Int_t ii = 0; ii < aResult->GetFieldCount(); ii++)
959 entry->SetRunParameter(aResult->GetFieldName(ii), aRow->GetField(ii));
961 UInt_t startTime = entry->GetStartTime();
962 UInt_t endTime = entry->GetEndTime();
964 if (!startTime || !endTime || startTime > endTime) {
966 Form("QueryRunParameters - Invalid parameters for Run %d: startTime = %d, endTime = %d",
967 run, startTime, endTime));
980 //______________________________________________________________________________________________
981 Bool_t AliShuttle::TryToStoreAgain()
983 // Called in case the detector failed to store the object in Grid OCDB
984 // It tries to store the object again, if it does not find more recent and overlapping objects
985 // Calls underlying TryToStoreAgain(const char*) function twice, for OCDB and Reference storage.
987 AliInfo("Trying to store OCDB data again...");
988 Bool_t resultCDB = TryToStoreAgain(fgkMainCDB);
990 AliInfo("Trying to store reference data again...");
991 Bool_t resultRef = TryToStoreAgain(fgkMainRefStorage);
993 return resultCDB && resultRef;
996 //______________________________________________________________________________________________
997 Bool_t AliShuttle::TryToStoreAgain(TString& gridURI)
999 // Called by TryToStoreAgain(), performs actual storage retry
1001 TObjArray* gridIds=0;
1003 Bool_t result = kTRUE;
1005 const char* type = 0;
1007 if(gridURI == fgkMainCDB) {
1009 backupURI = fgkLocalCDB;
1010 } else if(gridURI == fgkMainRefStorage) {
1012 backupURI = fgkLocalRefStorage;
1014 AliError(Form("Invalid storage URI: %s", gridURI.Data()));
1018 AliCDBManager* man = AliCDBManager::Instance();
1020 AliCDBStorage *gridSto = man->GetStorage(gridURI);
1022 Log(fCurrentDetector.Data(),
1023 Form("TryToStoreAgain - cannot activate main %s storage", type));
1027 gridIds = gridSto->GetQueryCDBList();
1029 // get objects previously stored in local CDB
1030 AliCDBStorage *backupSto = man->GetStorage(backupURI);
1031 AliCDBPath aPath(GetOfflineDetName(fCurrentDetector.Data()),"*","*");
1032 // Local objects were stored with current run as Grid version!
1033 TList* localEntries = backupSto->GetAll(aPath.GetPath(), GetCurrentRun(), GetCurrentRun());
1034 localEntries->SetOwner(1);
1036 // loop on local stored objects
1037 TIter localIter(localEntries);
1038 AliCDBEntry *aLocEntry = 0;
1039 while((aLocEntry = dynamic_cast<AliCDBEntry*> (localIter.Next()))){
1040 aLocEntry->SetOwner(1);
1041 AliCDBId aLocId = aLocEntry->GetId();
1042 aLocEntry->SetVersion(-1);
1043 aLocEntry->SetSubVersion(-1);
1045 // loop on Grid valid Id's
1046 Bool_t store = kTRUE;
1047 TIter gridIter(gridIds);
1048 AliCDBId* aGridId = 0;
1049 while((aGridId = dynamic_cast<AliCDBId*> (gridIter.Next()))){
1050 // If local object is valid up to infinity we store it only if it is
1051 // the first unprocessed run!
1052 if (aLocId.GetLastRun() == AliCDBRunRange::Infinity())
1054 if (!fFirstUnprocessed[GetDetPos(fCurrentDetector)])
1056 Log(fCurrentDetector.Data(),
1057 ("TryToStoreAgain - This object has validity infinite but "
1058 "there are previous unprocessed runs!"));
1064 if(aGridId->GetPath() != aLocId.GetPath()) continue;
1065 // skip all objects valid up to infinity
1066 if(aGridId->GetLastRun() == AliCDBRunRange::Infinity()) continue;
1067 // if we get here, it means there's already some more recent object stored on Grid!
1073 Log(fCurrentDetector.Data(),
1074 Form("TryToStoreAgain - A more recent object already exists in %s storage: <%s>",
1075 type, aGridId->ToString().Data()));
1076 // removing local filename...
1077 // TODO maybe it's better not to remove it, it was not copied to the Grid!
1079 backupSto->IdToFilename(aLocId, filename);
1080 AliInfo(Form("Removing local file %s", filename.Data()));
1081 gSystem->Exec(Form("rm %s",filename.Data()));
1085 // If we get here, the file can be stored!
1086 Bool_t storeOk = gridSto->Put(aLocEntry);
1088 Log(fCurrentDetector.Data(),
1089 Form("TryToStoreAgain - Object <%s> successfully put into %s storage",
1090 aLocId.ToString().Data(), type));
1092 // removing local filename...
1094 backupSto->IdToFilename(aLocId, filename);
1095 AliInfo(Form("Removing local file %s", filename.Data()));
1096 gSystem->Exec(Form("rm %s", filename.Data()));
1099 Log(fCurrentDetector.Data(),
1100 Form("TryToStoreAgain - Grid %s storage of object <%s> failed again",
1101 type, aLocId.ToString().Data()));
1105 localEntries->Clear();
1110 //______________________________________________________________________________________________
1111 Bool_t AliShuttle::GetValueSet(const char* host, Int_t port, const char* entry,
1112 TObjArray* valueSet, DCSType type)
1114 // Retrieve all "entry" data points from the DCS server
1115 // host, port: TSocket connection parameters
1116 // entry: name of the alias or data point
1117 // valueSet: array of retrieved AliDCSValue's
1118 // type: kAlias or kDP
1120 AliDCSClient client(host, port, fTimeout, fRetries);
1121 if (!client.IsConnected())
1130 result = client.GetAliasValues(entry,
1131 GetCurrentStartTime(), GetCurrentEndTime(), valueSet);
1135 result = client.GetDPValues(entry,
1136 GetCurrentStartTime(), GetCurrentEndTime(), valueSet);
1141 Log(fCurrentDetector.Data(), Form("GetValueSet - Can't get '%s'! Reason: %s",
1142 entry, AliDCSClient::GetErrorString(result)));
1144 if (result == AliDCSClient::fgkServerError)
1146 Log(fCurrentDetector.Data(), Form("GetValueSet - Server error: %s",
1147 client.GetServerError().Data()));
1156 //______________________________________________________________________________________________
1157 const char* AliShuttle::GetFile(Int_t system, const char* detector,
1158 const char* id, const char* source)
1160 // Get calibration file from file exchange servers
1161 // calls specific getter according to system index (kDAQ, kDCS, kHLT)
1165 return GetDAQFileName(detector, id, source);
1168 return GetDCSFileName(detector, id, source);
1171 return GetHLTFileName(detector, id, source);
1174 AliError(Form("No valid system index: %d",system));
1180 //______________________________________________________________________________________________
1181 TList* AliShuttle::GetFileSources(Int_t system, const char* detector, const char* id)
1183 // Get sources producing the condition file Id from file exchange servers
1184 // calls specific getter according to system index (kDAQ, kDCS, kHLT)
1188 return GetDAQFileSources(detector, id);
1191 return GetDCSFileSources(detector, id);
1194 return GetHLTFileSources(detector, id);
1197 AliError(Form("No valid system index: %d",system));
1203 //______________________________________________________________________________________________
1204 Bool_t AliShuttle::Connect(Int_t system)
1206 // Connect to MySQL Server of the system's FXS MySQL databases
1207 // DAQ Logbook, Shuttle Logbook and DAQ FXS db are on the same host
1209 // check connection: if already connected return
1210 if(fServer[system] && fServer[system]->IsConnected()) return kTRUE;
1212 TString dbHost, dbUser, dbPass, dbName;
1214 if (system < 3) // FXS db servers
1216 dbHost = Form("mysql://%s:%d", fConfig->GetFXSdbHost(system), fConfig->GetFXSdbPort(system));
1217 dbUser = fConfig->GetFXSdbUser(system);
1218 dbPass = fConfig->GetFXSdbPass(system);
1219 dbName = fConfig->GetFXSdbName(system);
1220 } else { // Run & Shuttle logbook servers
1221 // TODO Will the Shuttle logbook server be the same as the Run logbook server ???
1222 dbHost = Form("mysql://%s:%d", fConfig->GetDAQlbHost(), fConfig->GetDAQlbPort());
1223 dbUser = fConfig->GetDAQlbUser();
1224 dbPass = fConfig->GetDAQlbPass();
1225 dbName = fConfig->GetDAQlbDB();
1228 fServer[system] = TSQLServer::Connect(dbHost.Data(), dbUser.Data(), dbPass.Data());
1229 if (!fServer[system] || !fServer[system]->IsConnected()) {
1232 AliError(Form("Can't establish connection to FXS database for %s",
1233 AliShuttleInterface::GetSystemName(system)));
1235 AliError("Can't establish connection to Run logbook.");
1237 if(fServer[system]) delete fServer[system];
1242 // TODO in the configuration should the table name be there too?
1243 TSQLResult* aResult=0;
1246 aResult = fServer[kDAQ]->GetTables(dbName.Data());
1249 //aResult = fServer[kDCS]->GetTables(dbName.Data());
1252 aResult = fServer[kHLT]->GetTables(dbName.Data());
1255 aResult = fServer[3]->GetTables(dbName.Data());
1263 //______________________________________________________________________________________________
1264 const char* AliShuttle::GetDAQFileName(const char* detector, const char* id, const char* source)
1266 // Retrieves a file from the DAQ FXS.
1267 // First queris the DAQ FXS database for the DAQ file name, using the run, detector, id and source info
1268 // then calls RetrieveDAQFile(DAQfilename) for actual copy to local disk
1269 // run: current run being processed (given by Logbook entry fLogbookEntry)
1270 // detector: the Preprocessor name
1271 // id: provided as a parameter by the Preprocessor
1272 // source: provided by the Preprocessor through GetFileSources function
1274 // check connection, in case connect
1277 Log(detector, "GetDAQFileName - Couldn't connect to DAQ FXS database");
1281 // Query preparation
1282 TString sqlQueryStart = Form("select filePath from %s where", fConfig->GetFXSdbTable(kDAQ));
1283 TString whereClause = Form("run=%d and detector=\"%s\" and fileId=\"%s\" and DAQsource=\"%s\"",
1284 GetCurrentRun(), detector, id, source);
1285 TString sqlQuery = Form("%s %s", sqlQueryStart.Data(), whereClause.Data());
1287 AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
1290 TSQLResult* aResult = 0;
1291 aResult = dynamic_cast<TSQLResult*> (fServer[kDAQ]->Query(sqlQuery));
1293 Log(detector, Form("GetDAQFileName - Can't execute SQL query for: id = %s, source = %s",
1298 if(aResult->GetRowCount() == 0)
1301 Form("GetDAQFileName - No entry in FXS table for: id = %s, source = %s",
1307 if (aResult->GetRowCount() > 1) {
1309 Form("GetDAQFileName - More than one entry in FXS table for: id = %s, source = %s",
1315 TSQLRow* aRow = dynamic_cast<TSQLRow*> (aResult->Next());
1318 Log(detector, Form("GetDAQFileName - Empty set result from query: id = %s, source = %s",
1324 TString filePath(aRow->GetField(0), aRow->GetFieldLength(0));
1329 AliDebug(2, Form("filePath = %s",filePath.Data()));
1331 // retrieved file is renamed to make it unique
1332 TString localFileName = Form("DAQ_%s_%d_%s_%s.shuttle",
1333 detector, GetCurrentRun(), id, source);
1335 // file retrieval from DAQ FXS
1336 Bool_t result = RetrieveDAQFile(filePath.Data(), localFileName.Data());
1338 Log(detector, Form("GetDAQFileName - Copy of file %s from DAQ FXS failed", filePath.Data()));
1341 AliInfo(Form("File %s copied from DAQ FXS into %s/%s",
1342 filePath.Data(), fgkShuttleTempDir, localFileName.Data()));
1345 fFXSCalled[kDAQ]=kTRUE;
1346 TObjString *fileParams = new TObjString(Form("%s#!?!#%s", id, source));
1347 fFXSlist[kDAQ].Add(fileParams);
1349 static TString fullLocalFileName;
1350 fullLocalFileName = TString::Format("%s/%s", fgkShuttleTempDir, localFileName.Data());
1352 AliInfo(Form("fullLocalFileName = %s", fullLocalFileName.Data()));
1354 return fullLocalFileName.Data();
1358 //______________________________________________________________________________________________
1359 Bool_t AliShuttle::RetrieveDAQFile(const char* daqFileName, const char* localFileName)
1361 // Copies file from DAQ FXS to local Shuttle machine
1363 // check temp directory: trying to cd to temp; if it does not exist, create it
1364 AliDebug(2, Form("Copy file %s from DAQ FXS into %s/%s",
1365 daqFileName, fgkShuttleTempDir, localFileName));
1367 void* dir = gSystem->OpenDirectory(fgkShuttleTempDir);
1369 if (gSystem->mkdir(fgkShuttleTempDir, kTRUE)) {
1370 AliError(Form("Can't open directory <%s>", fgkShuttleTempDir));
1375 gSystem->FreeDirectory(dir);
1378 TString baseDAQFXSFolder = "FES";
1379 TString command = Form("scp -oPort=%d -2 %s@%s:%s/%s %s/%s",
1380 fConfig->GetFXSPort(kDAQ),
1381 fConfig->GetFXSUser(kDAQ),
1382 fConfig->GetFXSHost(kDAQ),
1383 baseDAQFXSFolder.Data(),
1388 AliDebug(2, Form("%s",command.Data()));
1390 UInt_t nRetries = 0;
1391 UInt_t maxRetries = 3;
1393 // copy!! if successful TSystem::Exec returns 0
1394 while(nRetries++ < maxRetries) {
1395 AliDebug(2, Form("Trying to copy file. Retry # %d", nRetries));
1396 if(gSystem->Exec(command.Data()) == 0) return kTRUE;
1403 //______________________________________________________________________________________________
1404 TList* AliShuttle::GetDAQFileSources(const char* detector, const char* id)
1406 // Retrieves list of DAQ sources of file Id
1408 // check connection, in case connect
1410 Log(detector, "GetDAQFileSources - Couldn't connect to DAQ FXS database");
1414 // Query preparation
1415 TString sqlQueryStart = Form("select DAQsource from %s where", fConfig->GetFXSdbTable(kDAQ));
1416 TString whereClause = Form("run=%d and detector=\"%s\" and fileId=\"%s\"",
1417 GetCurrentRun(), detector, id);
1418 TString sqlQuery = Form("%s %s", sqlQueryStart.Data(), whereClause.Data());
1420 AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
1423 TSQLResult* aResult;
1424 aResult = fServer[kDAQ]->Query(sqlQuery);
1426 Log(detector, Form("GetDAQFileSources - Can't execute SQL query for id: %s", id));
1430 if (aResult->GetRowCount() == 0) {
1432 Form("GetDAQFileSources - No entry in FXS table for id: %s", id));
1438 TList *list = new TList();
1441 while((aRow = aResult->Next())){
1443 TString daqSource(aRow->GetField(0), aRow->GetFieldLength(0));
1444 AliDebug(2, Form("daqSource = %s", daqSource.Data()));
1445 list->Add(new TObjString(daqSource));
1454 //______________________________________________________________________________________________
1455 const char* AliShuttle::GetDCSFileName(const char* /*detector*/, const char* /*id*/, const char* /*source*/){
1456 // Retrieves a file from the DCS FXS.
1458 return "You're in DCS";
1462 //______________________________________________________________________________________________
1463 TList* AliShuttle::GetDCSFileSources(const char* /*detector*/, const char* /*id*/){
1464 // Retrieves file sources from the DCS FXS.
1470 //______________________________________________________________________________________________
1471 const char* AliShuttle::GetHLTFileName(const char* detector, const char* id, const char* source){
1472 // Retrieves a file from the HLT FXS.
1473 // First queris the HLT FXS database for the HLT file name, using the run, detector, id and source info
1474 // then calls RetrieveDAQFile(DAQfilename) for actual copy to local disk
1475 // run: current run being processed (given by Logbook entry fLogbookEntry)
1476 // detector: the Preprocessor name
1477 // id: provided as a parameter by the Preprocessor
1478 // source: provided by the Preprocessor through GetFileSources function
1480 // check connection, in case connect
1483 Log(detector, "GetHLTFileName - Couldn't connect to HLT FXS database");
1487 // Query preparation
1488 TString sqlQueryStart = Form("select filePath,fileSize,fileChecksum from %s where",
1489 fConfig->GetFXSdbTable(kHLT));
1490 TString whereClause = Form("run=%d and detector=\"%s\" and fileId=\"%s\" and DDLnumbers=\"%s\"",
1491 GetCurrentRun(), detector, id, source);
1492 TString sqlQuery = Form("%s %s", sqlQueryStart.Data(), whereClause.Data());
1494 AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
1497 TSQLResult* aResult = 0;
1498 aResult = dynamic_cast<TSQLResult*> (fServer[kHLT]->Query(sqlQuery));
1500 Log(detector, Form("GetHLTFileName - Can't execute SQL query for: id = %s, source = %s",
1505 if(aResult->GetRowCount() == 0)
1508 Form("GetHLTFileName - No entry in FXS table for: id = %s, source = %s",
1514 if (aResult->GetRowCount() > 1) {
1516 Form("GetHLTFileName - More than one entry in FXS table for: id = %s, source = %s",
1522 if (aResult->GetFieldCount() != 3) {
1524 Form("GetHLTFileName - Wrong field count in FXS table for: id = %s, source = %s",
1530 TSQLRow* aRow = dynamic_cast<TSQLRow*> (aResult->Next());
1533 Log(detector, Form("GetHLTFileName - Empty set result from query: id = %s, source = %s",
1539 TString filePath(aRow->GetField(0), aRow->GetFieldLength(0));
1540 TString fileSize(aRow->GetField(1), aRow->GetFieldLength(1));
1541 TString fileMd5Sum(aRow->GetField(2), aRow->GetFieldLength(2));
1546 AliDebug(2, Form("filePath = %s",filePath.Data()));
1548 // The full file path in HLT FXS is runNb/DET/DDLnumber/filePath
1549 // TString fullFilePath = Form("%d/%s/%s/%s", GetCurrentRun(), detector, source, filePath.Data());
1551 // retrieved file is renamed to make it unique
1552 TString localFileName = Form("HLT_%s_%d_%s_%s.shuttle",
1553 detector, GetCurrentRun(), id, source);
1555 // file retrieval from HLT FXS
1556 Bool_t result = RetrieveHLTFile(filePath.Data(), localFileName.Data());
1559 Log(detector, Form("GetHLTFileName - Copy of file %s from HLT FXS failed", filePath.Data()));
1562 AliInfo(Form("File %s copied from HLT FXS into %s/%s",
1563 filePath.Data(), fgkShuttleTempDir, localFileName.Data()));
1566 // compare md5sum of local file with the one stored in the HLT DB
1567 Int_t md5Comp = gSystem->Exec(Form("md5sum %s/%s |grep %s 2>&1 > /dev/null",
1568 fgkShuttleTempDir, localFileName.Data(), fileMd5Sum.Data()));
1572 Log(detector, Form("GetHLTFileName - md5sum of file %s does not match with local copy!", filePath.Data()));
1576 fFXSCalled[kHLT]=kTRUE;
1577 TObjString *fileParams = new TObjString(Form("%s#!?!#%s", id, source));
1578 fFXSlist[kHLT].Add(fileParams);
1580 static TString fullLocalFileName;
1581 fullLocalFileName = TString::Format("%s/%s", fgkShuttleTempDir, localFileName.Data());
1583 AliInfo(Form("fullLocalFileName = %s", fullLocalFileName.Data()));
1585 return fullLocalFileName.Data();
1589 //______________________________________________________________________________________________
1590 Bool_t AliShuttle::RetrieveHLTFile(const char* hltFileName, const char* localFileName)
1592 // Copies file from HLT FXS to local Shuttle machine
1594 // check temp directory: trying to cd to temp; if it does not exist, create it
1595 AliDebug(2, Form("Copy file %s from HLT FXS into %s/%s",
1596 hltFileName, fgkShuttleTempDir, localFileName));
1598 void* dir = gSystem->OpenDirectory(fgkShuttleTempDir);
1600 if (gSystem->mkdir(fgkShuttleTempDir, kTRUE)) {
1601 AliError(Form("Can't open directory <%s>", fgkShuttleTempDir));
1606 gSystem->FreeDirectory(dir);
1609 TString baseHLTFXSFolder = "~";
1610 TString command = Form("scp -oPort=%d %s@%s:%s/%s %s/%s",
1611 fConfig->GetFXSPort(kHLT),
1612 fConfig->GetFXSUser(kHLT),
1613 fConfig->GetFXSHost(kHLT),
1614 baseHLTFXSFolder.Data(),
1619 AliDebug(2, Form("%s",command.Data()));
1621 UInt_t nRetries = 0;
1622 UInt_t maxRetries = 3;
1624 // copy!! if successful TSystem::Exec returns 0
1625 while(nRetries++ < maxRetries) {
1626 AliDebug(2, Form("Trying to copy file. Retry # %d", nRetries));
1627 if(gSystem->Exec(command.Data()) == 0) return kTRUE;
1634 //______________________________________________________________________________________________
1635 TList* AliShuttle::GetHLTFileSources(const char* detector, const char* id){
1636 // Retrieves list of HLT sources (DDLnumbers) of file Id
1638 // check connection, in case connect
1640 Log(detector, "GetHLTFileSources - Couldn't connect to HLT FXS database");
1644 // Query preparation
1645 TString sqlQueryStart = Form("select DDLnumbers from %s where", fConfig->GetFXSdbTable(kHLT));
1646 TString whereClause = Form("run=%d and detector=\"%s\" and fileId=\"%s\"",
1647 GetCurrentRun(), detector, id);
1648 TString sqlQuery = Form("%s %s", sqlQueryStart.Data(), whereClause.Data());
1650 AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
1653 TSQLResult* aResult;
1654 aResult = fServer[kHLT]->Query(sqlQuery);
1656 Log(detector, Form("GetHLTFileSources - Can't execute SQL query for id: %s", id));
1660 if (aResult->GetRowCount() == 0) {
1662 Form("GetHLTFileSources - No entry in FXS table for id: %s", id));
1668 TList *list = new TList();
1671 while((aRow = aResult->Next())){
1673 TString ddlNumbers(aRow->GetField(0), aRow->GetFieldLength(0));
1674 AliDebug(2, Form("DDLnumbers = %s", ddlNumbers.Data()));
1675 list->Add(new TObjString(ddlNumbers));
1684 //______________________________________________________________________________________________
1685 Bool_t AliShuttle::UpdateDAQTable()
1687 // Update DAQ table filling time_processed field in all rows corresponding to current run and detector
1689 // check connection, in case connect
1691 Log(fCurrentDetector, "UpdateDAQTable - Couldn't connect to DAQ FXS database");
1695 TTimeStamp now; // now
1697 // Loop on FXS list entries
1698 TIter iter(&fFXSlist[kDAQ]);
1699 TObjString *aFXSentry=0;
1700 while((aFXSentry = dynamic_cast<TObjString*> (iter.Next()))){
1701 TString aFXSentrystr = aFXSentry->String();
1702 TObjArray *aFXSarray = aFXSentrystr.Tokenize("#!?!#");
1703 if(!aFXSarray || aFXSarray->GetEntries() != 2 ) {
1704 Log(fCurrentDetector, Form("UpdateDAQTable - error updating FXS entry. Check string: <%s>",
1705 aFXSentrystr.Data()));
1706 if(aFXSarray) delete aFXSarray;
1709 const char* fileId = ((TObjString*) aFXSarray->At(0))->GetName();
1710 const char* daqSource = ((TObjString*) aFXSarray->At(1))->GetName();
1711 TString whereClause = Form("where run=%d and detector=\"%s\" and fileId=\"%s\" and DAQsource=\"%s\";",
1712 GetCurrentRun(), fCurrentDetector.Data(), fileId, daqSource);
1716 TString sqlQuery = Form("update %s set time_processed=%d %s", fConfig->GetFXSdbTable(kDAQ),
1717 now.GetSec(), whereClause.Data());
1719 AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
1722 TSQLResult* aResult;
1723 aResult = dynamic_cast<TSQLResult*> (fServer[kDAQ]->Query(sqlQuery));
1725 Log(fCurrentDetector, Form("UpdateDAQTable - Can't execute SQL query <%s>", sqlQuery.Data()));
1734 //______________________________________________________________________________________________
1735 Bool_t AliShuttle::UpdateHLTTable()
1737 // Update HLT table filling time_processed field in all rows corresponding to current run and detector
1739 // check connection, in case connect
1741 Log(fCurrentDetector, "UpdateHLTTable - Couldn't connect to HLT FXS database");
1745 TTimeStamp now; // now
1747 // Loop on FXS list entries
1748 TIter iter(&fFXSlist[kHLT]);
1749 TObjString *aFXSentry=0;
1750 while((aFXSentry = dynamic_cast<TObjString*> (iter.Next()))){
1751 TString aFXSentrystr = aFXSentry->String();
1752 TObjArray *aFXSarray = aFXSentrystr.Tokenize("#!?!#");
1753 if(!aFXSarray || aFXSarray->GetEntries() != 2 ) {
1754 Log(fCurrentDetector, Form("UpdateHLTTable - error updating FXS entry. Check string: <%s>",
1755 aFXSentrystr.Data()));
1756 if(aFXSarray) delete aFXSarray;
1759 const char* fileId = ((TObjString*) aFXSarray->At(0))->GetName();
1760 const char* hltSource = ((TObjString*) aFXSarray->At(1))->GetName();
1761 TString whereClause = Form("where run=%d and detector=\"%s\" and fileId=\"%s\" and DDLnumbers=\"%s\";",
1762 GetCurrentRun(), fCurrentDetector.Data(), fileId, hltSource);
1766 TString sqlQuery = Form("update %s set time_processed=%d %s", fConfig->GetFXSdbTable(kHLT),
1767 now.GetSec(), whereClause.Data());
1769 AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
1772 TSQLResult* aResult;
1773 aResult = dynamic_cast<TSQLResult*> (fServer[kHLT]->Query(sqlQuery));
1775 Log(fCurrentDetector, Form("UpdateHLTTable - Can't execute SQL query <%s>", sqlQuery.Data()));
1784 //______________________________________________________________________________________________
1785 Bool_t AliShuttle::UpdateShuttleLogbook(const char* detector, const char* status)
1787 // Update Shuttle logbook filling detector or shuttle_done column
1788 // ex. of usage: UpdateShuttleLogbook("PHOS", "DONE") or UpdateShuttleLogbook("shuttle_done")
1790 // check connection, in case connect
1792 Log("SHUTTLE", "UpdateShuttleLogbook - Couldn't connect to DAQ Logbook.");
1796 TString detName(detector);
1798 if(detName == "shuttle_done") {
1799 setClause = "set shuttle_done=1";
1801 TString statusStr(status);
1802 if(statusStr.Contains("done", TString::kIgnoreCase) ||
1803 statusStr.Contains("failed", TString::kIgnoreCase)){
1804 setClause = Form("set %s=\"%s\"", detector, status);
1807 Form("UpdateShuttleLogbook - Invalid status <%s> for detector %s",
1813 TString whereClause = Form("where run=%d", GetCurrentRun());
1815 TString sqlQuery = Form("update logbook_shuttle %s %s",
1816 setClause.Data(), whereClause.Data());
1818 AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
1821 TSQLResult* aResult;
1822 aResult = dynamic_cast<TSQLResult*> (fServer[3]->Query(sqlQuery));
1824 Log("SHUTTLE", Form("UpdateShuttleLogbook - Can't execute query <%s>", sqlQuery.Data()));
1832 //______________________________________________________________________________________________
1833 Int_t AliShuttle::GetCurrentRun() const
1835 // Get current run from logbook entry
1837 return fLogbookEntry ? fLogbookEntry->GetRun() : -1;
1840 //______________________________________________________________________________________________
1841 UInt_t AliShuttle::GetCurrentStartTime() const
1843 // get current start time
1845 return fLogbookEntry ? fLogbookEntry->GetStartTime() : 0;
1848 //______________________________________________________________________________________________
1849 UInt_t AliShuttle::GetCurrentEndTime() const
1851 // get current end time from logbook entry
1853 return fLogbookEntry ? fLogbookEntry->GetEndTime() : 0;
1856 //______________________________________________________________________________________________
1857 void AliShuttle::Log(const char* detector, const char* message)
1859 // Fill log string with a message
1861 void* dir = gSystem->OpenDirectory(fgkShuttleLogDir);
1863 if (gSystem->mkdir(fgkShuttleLogDir, kTRUE)) {
1864 AliError(Form("Can't open directory <%s>", fgkShuttleTempDir));
1869 gSystem->FreeDirectory(dir);
1872 TString toLog = Form("%s (%d): %s - ", TTimeStamp(time(0)).AsString("s"), getpid(), detector);
1873 if(GetCurrentRun()>=0 ) toLog += Form("run %d - ", GetCurrentRun());
1874 toLog += Form("%s", message);
1876 AliInfo(toLog.Data());
1879 fileName.Form("%s/%s.log", fgkShuttleLogDir, detector);
1880 gSystem->ExpandPathName(fileName);
1883 logFile.open(fileName, ofstream::out | ofstream::app);
1885 if (!logFile.is_open()) {
1886 AliError(Form("Could not open file %s", fileName.Data()));
1890 logFile << toLog.Data() << "\n";
1895 //______________________________________________________________________________________________
1896 Bool_t AliShuttle::Collect(Int_t run)
1899 // Collects conditions data for all UNPROCESSED run written to DAQ LogBook in case of run = -1 (default)
1900 // If a dedicated run is given this run is processed
1902 // In operational mode, this is the Shuttle function triggered by the EOR signal.
1906 Log("SHUTTLE","Collect - Shuttle called. Collecting conditions data for unprocessed runs");
1908 Log("SHUTTLE", Form("Collect - Shuttle called. Collecting conditions data for run %d", run));
1910 SetLastAction("Starting");
1912 TString whereClause("where shuttle_done=0");
1914 whereClause += Form(" and run=%d", run);
1916 TObjArray shuttleLogbookEntries;
1917 if (!QueryShuttleLogbook(whereClause, shuttleLogbookEntries))
1919 Log("SHUTTLE", "Collect - Can't retrieve entries from Shuttle logbook");
1923 if (shuttleLogbookEntries.GetEntries() == 0)
1926 Log("SHUTTLE","Collect - Found no UNPROCESSED runs in Shuttle logbook");
1928 Log("SHUTTLE", Form("Collect - Run %d is already DONE "
1929 "or it does not exist in Shuttle logbook", run));
1933 for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
1934 fFirstUnprocessed[iDet] = kTRUE;
1938 // query Shuttle logbook for earlier runs, check if some detectors are unprocessed,
1939 // flag them into fFirstUnprocessed array
1940 TString whereClause(Form("where shuttle_done=0 and run < %d", run));
1941 TObjArray tmpLogbookEntries;
1942 if (!QueryShuttleLogbook(whereClause, tmpLogbookEntries))
1944 Log("SHUTTLE", "Collect - Can't retrieve entries from Shuttle logbook");
1948 TIter iter(&tmpLogbookEntries);
1949 AliShuttleLogbookEntry* anEntry = 0;
1950 while ((anEntry = dynamic_cast<AliShuttleLogbookEntry*> (iter.Next())))
1952 for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
1954 if (anEntry->GetDetectorStatus(iDet) == AliShuttleLogbookEntry::kUnprocessed)
1956 AliDebug(2, Form("Run %d: setting %s as \"not first time unprocessed\"",
1957 anEntry->GetRun(), GetDetName(iDet)));
1958 fFirstUnprocessed[iDet] = kFALSE;
1966 if (!RetrieveConditionsData(shuttleLogbookEntries))
1968 Log("SHUTTLE", "Collect - Process of at least one run failed");
1975 //______________________________________________________________________________________________
1976 Bool_t AliShuttle::RetrieveConditionsData(const TObjArray& dateEntries)
1978 // Retrieve conditions data for all runs that aren't processed yet
1980 Bool_t hasError = kFALSE;
1982 TIter iter(&dateEntries);
1983 AliShuttleLogbookEntry* anEntry;
1985 while ((anEntry = (AliShuttleLogbookEntry*) iter.Next())){
1986 if (!Process(anEntry)){
1991 return hasError == kFALSE;
1994 //______________________________________________________________________________________________
1995 ULong_t AliShuttle::GetTimeOfLastAction() const
1999 fMonitoringMutex->Lock();
2001 tmp = fLastActionTime;
2003 fMonitoringMutex->UnLock();
2008 //______________________________________________________________________________________________
2009 const TString AliShuttle::GetLastAction() const
2011 // returns a string description of the last action
2015 fMonitoringMutex->Lock();
2019 fMonitoringMutex->UnLock();
2024 //______________________________________________________________________________________________
2025 void AliShuttle::SetLastAction(const char* action)
2027 // updates the monitoring variables
2029 fMonitoringMutex->Lock();
2031 fLastAction = action;
2032 fLastActionTime = time(0);
2034 fMonitoringMutex->UnLock();
2037 //______________________________________________________________________________________________
2038 const char* AliShuttle::GetRunParameter(const char* param)
2040 // returns run parameter read from DAQ logbook
2042 if(!fLogbookEntry) {
2043 AliError("No logbook entry!");
2047 return fLogbookEntry->GetRunParameter(param);
2050 //______________________________________________________________________________________________
2051 Bool_t AliShuttle::SendMail()
2053 // sends a mail to the subdetector expert in case of preprocessor error
2055 void* dir = gSystem->OpenDirectory(fgkShuttleLogDir);
2058 if (gSystem->mkdir(fgkShuttleLogDir, kTRUE))
2060 AliError(Form("Can't open directory <%s>", fgkShuttleTempDir));
2065 gSystem->FreeDirectory(dir);
2068 TString bodyFileName;
2069 bodyFileName.Form("%s/mail.body", fgkShuttleLogDir);
2070 gSystem->ExpandPathName(bodyFileName);
2073 mailBody.open(bodyFileName, ofstream::out);
2075 if (!mailBody.is_open())
2077 AliError(Form("Could not open mail body file %s", bodyFileName.Data()));
2082 TIter iterExperts(fConfig->GetResponsibles(fCurrentDetector));
2083 TObjString *anExpert=0;
2084 while ((anExpert = (TObjString*) iterExperts.Next()))
2086 to += Form("%s,", anExpert->GetName());
2088 to.Remove(to.Length()-1);
2089 AliInfo(Form("to: %s",to.Data()));
2091 TString cc="alberto.colla@cern.ch";
2093 TString subject = Form("%s Shuttle preprocessor error in run %d !",
2094 fCurrentDetector.Data(), GetCurrentRun());
2095 AliInfo(Form("subject: %s", subject.Data()));
2097 TString body = Form("Dear %s expert(s), \n\n", fCurrentDetector.Data());
2098 body += Form("SHUTTLE just detected that your preprocessor "
2099 "exited with ERROR state in run %d !!\n\n", GetCurrentRun());
2100 body += Form("Please check %s status on the web page asap!\n\n", fCurrentDetector.Data());
2101 body += Form("The last 10 lines of %s log file are following:\n\n");
2103 AliInfo(Form("Body begin: %s", body.Data()));
2105 mailBody << body.Data();
2107 mailBody.open(bodyFileName, ofstream::out | ofstream::app);
2109 TString logFileName = Form("%s/%s.log", fgkShuttleLogDir, fCurrentDetector.Data());
2110 TString tailCommand = Form("tail -n 10 %s >> %s", logFileName.Data(), bodyFileName.Data());
2111 if (gSystem->Exec(tailCommand.Data()))
2113 mailBody << Form("%s log file not found ...\n\n", fCurrentDetector.Data());
2116 TString endBody = Form("------------------------------------------------------\n\n");
2117 endBody += Form("In case of problems please contact the SHUTTLE core team!\n\n");
2118 endBody += "Please do not answer this message directly, it is automatically generated!\n\n";
2119 endBody += "Sincerely yours,\n\n \t\t\tthe SHUTTLE\n";
2121 AliInfo(Form("Body end: %s", endBody.Data()));
2123 mailBody << endBody.Data();
2128 TString mailCommand = Form("mail -s \"%s\" -c %s %s < %s",
2132 bodyFileName.Data());
2133 AliInfo(Form("mail command: %s", mailCommand.Data()));
2135 Bool_t result = gSystem->Exec(mailCommand.Data());