1 /**************************************************************************
2 * Copyright(c) 1998-1999, ALICE Experiment at CERN, All rights reserved. *
4 * Author: The ALICE Off-line Project. *
5 * Contributors are mentioned in the code where appropriate. *
7 * Permission to use, copy, modify and distribute this software and its *
8 * documentation strictly for non-commercial purposes is hereby granted *
9 * without fee, provided that the above copyright notice appears in all *
10 * copies and that both the copyright notice and this permission notice *
11 * appear in the supporting documentation. The authors make no claims *
12 * about the suitability of this software for any purpose. It is *
13 * provided "as is" without express or implied warranty. *
14 **************************************************************************/
18 Revision 1.30 2007/02/13 11:23:21 acolla
19 Moved getters and setters of Shuttle's main OCDB/Reference, local
20 OCDB/Reference, temp and log folders to AliShuttleInterface
22 Revision 1.27 2007/01/30 17:52:42 jgrosseo
23 adding monalisa monitoring
25 Revision 1.26 2007/01/23 19:20:03 acolla
26 Removed old ldif files, added TOF, MCH ldif files. Added some options in
27 AliShuttleConfig::Print. Added in Ali Shuttle: SetShuttleTempDir and
30 Revision 1.25 2007/01/15 19:13:52 acolla
31 Moved some AliInfo to AliDebug in SendMail function
33 Revision 1.21 2006/12/07 08:51:26 jgrosseo
35 table, db names in ldap configuration
36 added GRP preprocessor
37 DCS data can also be retrieved by data point
39 Revision 1.20 2006/11/16 16:16:48 jgrosseo
40 introducing strict run ordering flag
41 removed giving preprocessor name to preprocessor, they have to know their name themselves ;-)
43 Revision 1.19 2006/11/06 14:23:04 jgrosseo
44 major update (Alberto)
45 o) reading of run parameters from the logbook
46 o) online offline naming conversion
47 o) standalone DCSclient package
49 Revision 1.18 2006/10/20 15:22:59 jgrosseo
50 o) Adding time out to the execution of the preprocessors: The Shuttle forks and the parent process monitors the child
51 o) Merging Collect, CollectAll, CollectNew function
52 o) Removing implementation of empty copy constructors (declaration still there!)
54 Revision 1.17 2006/10/05 16:20:55 jgrosseo
55 adapting to new CDB classes
57 Revision 1.16 2006/10/05 15:46:26 jgrosseo
58 applying to the new interface
60 Revision 1.15 2006/10/02 16:38:39 jgrosseo
63 storing of objects that failed to be stored to the grid before
64 interfacing of shuttle status table in daq system
66 Revision 1.14 2006/08/29 09:16:05 jgrosseo
69 Revision 1.13 2006/08/15 10:50:00 jgrosseo
70 effc++ corrections (alberto)
72 Revision 1.12 2006/08/08 14:19:29 jgrosseo
73 Update to shuttle classes (Alberto)
75 - Possibility to set the full object's path in the Preprocessor's and
76 Shuttle's Store functions
77 - Possibility to extend the object's run validity in the same classes
78 ("startValidity" and "validityInfinite" parameters)
79 - Implementation of the StoreReferenceData function to store reference
80 data in a dedicated CDB storage.
82 Revision 1.11 2006/07/21 07:37:20 jgrosseo
83 last run is stored after each run
85 Revision 1.10 2006/07/20 09:54:40 jgrosseo
86 introducing status management: The processing per subdetector is divided into several steps,
87 after each step the status is stored on disk. If the system crashes in any of the steps the Shuttle
88 can keep track of the number of failures and skips further processing after a certain threshold is
89 exceeded. These thresholds can be configured in LDAP.
91 Revision 1.9 2006/07/19 10:09:55 jgrosseo
92 new configuration, accesst to DAQ FES (Alberto)
94 Revision 1.8 2006/07/11 12:44:36 jgrosseo
95 adding parameters for extended validity range of data produced by preprocessor
97 Revision 1.7 2006/07/10 14:37:09 jgrosseo
98 small fix + todo comment
100 Revision 1.6 2006/07/10 13:01:41 jgrosseo
101 enhanced storing of last sucessfully processed run (alberto)
103 Revision 1.5 2006/07/04 14:59:57 jgrosseo
104 revision of AliDCSValue: Removed wrapper classes, reduced storage size per value by factor 2
106 Revision 1.4 2006/06/12 09:11:16 jgrosseo
107 coding conventions (Alberto)
109 Revision 1.3 2006/06/06 14:26:40 jgrosseo
110 o) removed files that were moved to STEER
111 o) shuttle updated to follow the new interface (Alberto)
113 Revision 1.2 2006/03/07 07:52:34 hristov
114 New version (B.Yordanov)
116 Revision 1.6 2005/11/19 17:19:14 byordano
117 RetrieveDATEEntries and RetrieveConditionsData added
119 Revision 1.5 2005/11/19 11:09:27 byordano
120 AliShuttle declaration added
122 Revision 1.4 2005/11/17 17:47:34 byordano
123 TList changed to TObjArray
125 Revision 1.3 2005/11/17 14:43:23 byordano
128 Revision 1.1.1.1 2005/10/28 07:33:58 hristov
129 Initial import as subdirectory in AliRoot
131 Revision 1.2 2005/09/13 08:41:15 byordano
132 default startTime endTime added
134 Revision 1.4 2005/08/30 09:13:02 byordano
137 Revision 1.3 2005/08/29 21:15:47 byordano
143 // This class is the main manager for AliShuttle.
144 // It organizes the data retrieval from DCS and call the
145 // interface methods of AliPreprocessor.
146 // For every detector in AliShuttleConfgi (see AliShuttleConfig),
147 // data for its set of aliases is retrieved. If there is registered
148 // AliPreprocessor for this detector then it will be used
149 // accroding to the schema (see AliPreprocessor).
150 // If there isn't registered AliPreprocessor than the retrieved
151 // data is stored automatically to the undelying AliCDBStorage.
152 // For detSpec is used the alias name.
155 #include "AliShuttle.h"
157 #include "AliCDBManager.h"
158 #include "AliCDBStorage.h"
159 #include "AliCDBId.h"
160 #include "AliCDBRunRange.h"
161 #include "AliCDBPath.h"
162 #include "AliCDBEntry.h"
163 #include "AliShuttleConfig.h"
164 #include "DCSClient/AliDCSClient.h"
166 #include "AliPreprocessor.h"
167 #include "AliShuttleStatus.h"
168 #include "AliShuttleLogbookEntry.h"
173 #include <TTimeStamp.h>
174 #include <TObjString.h>
175 #include <TSQLServer.h>
176 #include <TSQLResult.h>
180 #include <TMonaLisaWriter.h>
184 #include <sys/types.h>
185 #include <sys/wait.h>
189 Bool_t AliShuttle::fgkProcessDCS(kTRUE);
191 //______________________________________________________________________________________________
192 AliShuttle::AliShuttle(const AliShuttleConfig* config,
193 UInt_t timeout, Int_t retries):
195 fTimeout(timeout), fRetries(retries),
207 // config: AliShuttleConfig used
208 // timeout: timeout used for AliDCSClient connection
209 // retries: the number of retries in case of connection error.
212 if (!fConfig->IsValid()) AliFatal("********** !!!!! Invalid configuration !!!!! **********");
213 for(int iSys=0;iSys<4;iSys++) {
216 fFXSlist[iSys].SetOwner(kTRUE);
218 fPreprocessorMap.SetOwner(kTRUE);
220 for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
221 fFirstUnprocessed[iDet] = kFALSE;
223 fMonitoringMutex = new TMutex();
226 //______________________________________________________________________________________________
227 AliShuttle::~AliShuttle()
231 fPreprocessorMap.DeleteAll();
232 for(int iSys=0;iSys<4;iSys++)
234 fServer[iSys]->Close();
235 delete fServer[iSys];
244 if (fMonitoringMutex)
246 delete fMonitoringMutex;
247 fMonitoringMutex = 0;
251 //______________________________________________________________________________________________
252 void AliShuttle::RegisterPreprocessor(AliPreprocessor* preprocessor)
255 // Registers new AliPreprocessor.
256 // It uses GetName() for indentificator of the pre processor.
257 // The pre processor is registered it there isn't any other
258 // with the same identificator (GetName()).
261 const char* detName = preprocessor->GetName();
262 if(GetDetPos(detName) < 0)
263 AliFatal(Form("********** !!!!! Invalid detector name: %s !!!!! **********", detName));
265 if (fPreprocessorMap.GetValue(detName)) {
266 AliWarning(Form("AliPreprocessor %s is already registered!", detName));
270 fPreprocessorMap.Add(new TObjString(detName), preprocessor);
272 //______________________________________________________________________________________________
273 UInt_t AliShuttle::Store(const AliCDBPath& path, TObject* object,
274 AliCDBMetaData* metaData, Int_t validityStart, Bool_t validityInfinite)
276 // Stores a CDB object in the storage for offline reconstruction. Objects that are not needed for
277 // offline reconstruction, but should be stored anyway (e.g. for debugging) should NOT be stored
278 // using this function. Use StoreReferenceData instead!
279 // It calls WriteToCDB function which perform actual storage
281 return WriteToCDB(fgkMainCDB, fgkLocalCDB, path, object,
282 metaData, validityStart, validityInfinite);
286 //______________________________________________________________________________________________
287 UInt_t AliShuttle::StoreReferenceData(const AliCDBPath& path, TObject* object, AliCDBMetaData* metaData)
289 // Stores a CDB object in the storage for reference data. This objects will not be available during
290 // offline reconstrunction. Use this function for reference data only!
291 // It calls WriteToCDB function which perform actual storage
293 return WriteToCDB(fgkMainRefStorage, fgkLocalRefStorage, path, object, metaData);
297 //______________________________________________________________________________________________
298 UInt_t AliShuttle::WriteToCDB(const char* mainUri, const char* localUri,
299 const AliCDBPath& path, TObject* object, AliCDBMetaData* metaData,
300 Int_t validityStart, Bool_t validityInfinite)
302 // write object into the CDB. Parameters are passed by Store and StoreReferenceData functions.
303 // The parameters are:
304 // 1) Uri of the main storage (Grid)
305 // 2) Uri of the backup storage (Local)
306 // 3) the object's path.
307 // 4) the object to be stored
308 // 5) the metaData to be associated with the object
309 // 6) the validity start run number w.r.t. the current run,
310 // if the data is valid only for this run leave the default 0
311 // 7) specifies if the calibration data is valid for infinity (this means until updated),
312 // typical for calibration runs, the default is kFALSE
315 // 1 if stored in main (Grid) storage
316 // 2 if stored in backup (Local) storage
318 const char* cdbType = (mainUri == fgkMainCDB) ? "CDB" : "Reference";
320 Int_t firstRun = GetCurrentRun() - validityStart;
322 AliError("First valid run happens to be less than 0! Setting it to 0.");
327 if(validityInfinite) {
328 lastRun = AliCDBRunRange::Infinity();
330 lastRun = GetCurrentRun();
333 AliCDBId id(path, firstRun, lastRun, -1, -1);
335 if(! dynamic_cast<TObjString*> (metaData->GetProperty("RunUsed(TObjString)"))){
336 TObjString runUsed = Form("%d", GetCurrentRun());
337 metaData->SetProperty("RunUsed(TObjString)", runUsed.Clone());
342 if (!(AliCDBManager::Instance()->GetStorage(mainUri))) {
343 AliError(Form("WriteToCDB - Cannot activate main %s storage", cdbType));
345 result = (UInt_t) AliCDBManager::Instance()->GetStorage(mainUri)
346 ->Put(object, id, metaData);
351 Log(fCurrentDetector,
352 Form("WriteToCDB - Problem with main %s storage. Putting <%s> into backup storage",
353 cdbType, path.GetPath().Data()));
355 // Set Grid version to current run number, to ease retrieval later
356 id.SetVersion(GetCurrentRun());
358 result = AliCDBManager::Instance()->GetStorage(localUri)
359 ->Put(object, id, metaData);
365 Log(fCurrentDetector, "WriteToCDB - Can't store data!");
373 //______________________________________________________________________________________________
374 AliShuttleStatus* AliShuttle::ReadShuttleStatus()
376 // Reads the AliShuttleStatus from the CDB
383 fStatusEntry = AliCDBManager::Instance()->GetStorage(GetLocalCDB())
384 ->Get(Form("/SHUTTLE/STATUS/%s", fCurrentDetector.Data()), GetCurrentRun());
386 if (!fStatusEntry) return 0;
387 fStatusEntry->SetOwner(1);
389 AliShuttleStatus* status = dynamic_cast<AliShuttleStatus*> (fStatusEntry->GetObject());
391 AliError("Invalid object stored to CDB!");
398 //______________________________________________________________________________________________
399 Bool_t AliShuttle::WriteShuttleStatus(AliShuttleStatus* status)
401 // writes the status for one subdetector
408 Int_t run = GetCurrentRun();
410 AliCDBId id(AliCDBPath("SHUTTLE", "STATUS", fCurrentDetector), run, run);
412 fStatusEntry = new AliCDBEntry(status, id, new AliCDBMetaData);
413 fStatusEntry->SetOwner(1);
415 UInt_t result = AliCDBManager::Instance()->GetStorage(fgkLocalCDB)->Put(fStatusEntry);
418 AliError(Form("WriteShuttleStatus for %s, run %d failed", fCurrentDetector.Data(), run));
427 //______________________________________________________________________________________________
428 void AliShuttle::UpdateShuttleStatus(AliShuttleStatus::Status newStatus, Bool_t increaseCount)
430 // changes the AliShuttleStatus for the given detector and run to the given status
433 AliError("UNEXPECTED: fStatusEntry empty");
437 AliShuttleStatus* status = dynamic_cast<AliShuttleStatus*> (fStatusEntry->GetObject());
440 AliError("UNEXPECTED: status could not be read from current CDB entry");
444 TString actionStr = Form("UpdateShuttleStatus - %s: Changing state from %s to %s",
445 fCurrentDetector.Data(),
446 status->GetStatusName(),
447 status->GetStatusName(newStatus));
448 Log("SHUTTLE", actionStr);
449 SetLastAction(actionStr);
451 status->SetStatus(newStatus);
452 if (increaseCount) status->IncreaseCount();
454 AliCDBManager::Instance()->GetStorage(fgkLocalCDB)->Put(fStatusEntry);
459 //______________________________________________________________________________________________
460 void AliShuttle::SendMLInfo()
463 // sends ML information about the current status of the current detector being processed
466 AliShuttleStatus* status = dynamic_cast<AliShuttleStatus*> (fStatusEntry->GetObject());
469 AliError("UNEXPECTED: status could not be read from current CDB entry");
473 TMonaLisaText mlStatus(Form("%s_status", fCurrentDetector.Data()), status->GetStatusName());
474 TMonaLisaValue mlRetryCount(Form("%s_count", fCurrentDetector.Data()), status->GetCount());
477 mlList.Add(&mlStatus);
478 mlList.Add(&mlRetryCount);
480 fMonaLisa->SendParameters(&mlList);
483 //______________________________________________________________________________________________
484 Bool_t AliShuttle::ContinueProcessing()
486 // this function reads the AliShuttleStatus information from CDB and
487 // checks if the processing should be continued
488 // if yes it returns kTRUE and updates the AliShuttleStatus with nextStatus
490 if (!fConfig->HostProcessDetector(fCurrentDetector)) return kFALSE;
492 AliPreprocessor* aPreprocessor =
493 dynamic_cast<AliPreprocessor*> (fPreprocessorMap.GetValue(fCurrentDetector));
496 AliInfo(Form("%s: no preprocessor registered", fCurrentDetector.Data()));
500 AliShuttleLogbookEntry::Status entryStatus =
501 fLogbookEntry->GetDetectorStatus(fCurrentDetector);
503 if(entryStatus != AliShuttleLogbookEntry::kUnprocessed) {
504 AliInfo(Form("ContinueProcessing - %s is %s",
505 fCurrentDetector.Data(),
506 fLogbookEntry->GetDetectorStatusName(entryStatus)));
510 // if we get here, according to Shuttle logbook subdetector is in UNPROCESSED state
512 // check if current run is first unprocessed run for current detector
513 if (fConfig->StrictRunOrder(fCurrentDetector) &&
514 !fFirstUnprocessed[GetDetPos(fCurrentDetector)])
516 Log("SHUTTLE", Form("ContinueProcessing - %s requires strict run ordering but this is not the first unprocessed run!"));
520 AliShuttleStatus* status = ReadShuttleStatus();
523 Log("SHUTTLE", Form("ContinueProcessing - %s: Processing first time",
524 fCurrentDetector.Data()));
525 status = new AliShuttleStatus(AliShuttleStatus::kStarted);
526 return WriteShuttleStatus(status);
529 // The following two cases shouldn't happen if Shuttle Logbook was correctly updated.
530 // If it happens it may mean Logbook updating failed... let's do it now!
531 if (status->GetStatus() == AliShuttleStatus::kDone ||
532 status->GetStatus() == AliShuttleStatus::kFailed){
533 Log("SHUTTLE", Form("ContinueProcessing - %s is already %s. Updating Shuttle Logbook",
534 fCurrentDetector.Data(),
535 status->GetStatusName(status->GetStatus())));
536 UpdateShuttleLogbook(fCurrentDetector.Data(),
537 status->GetStatusName(status->GetStatus()));
541 if (status->GetStatus() == AliShuttleStatus::kStoreFailed) {
543 Form("ContinueProcessing - %s: Grid storage of one or more objects failed. Trying again now",
544 fCurrentDetector.Data()));
545 if(TryToStoreAgain()){
546 Log(fCurrentDetector.Data(), "ContinueProcessing - All objects successfully stored into OCDB");
547 UpdateShuttleStatus(AliShuttleStatus::kDone);
548 UpdateShuttleLogbook(fCurrentDetector.Data(), "DONE");
551 Form("ContinueProcessing - %s: Grid storage failed again",
552 fCurrentDetector.Data()));
553 // trigger ML information manually because we do not had a status change
559 // if we get here, there is a restart
560 Bool_t cont = kFALSE;
563 if (status->GetCount() >= fConfig->GetMaxRetries()) {
564 Log("SHUTTLE", Form("ContinueProcessing - %s failed %d times in status %s - "
565 "Updating Shuttle Logbook", fCurrentDetector.Data(),
566 status->GetCount(), status->GetStatusName()));
567 UpdateShuttleLogbook(fCurrentDetector.Data(), "FAILED");
568 UpdateShuttleStatus(AliShuttleStatus::kFailed);
570 Log("SHUTTLE", Form("ContinueProcessing - %s: restarting. "
571 "Aborted before with %s. Retry number %d.", fCurrentDetector.Data(),
572 status->GetStatusName(), status->GetCount()));
573 UpdateShuttleStatus(AliShuttleStatus::kStarted, kTRUE);
577 // Send mail to detector expert!
578 AliInfo(Form("Sending mail to %s expert...", fCurrentDetector.Data()));
580 Log("SHUTTLE", Form("ContinueProcessing - Could not send mail to %s expert",
581 fCurrentDetector.Data()));
586 //______________________________________________________________________________________________
587 Bool_t AliShuttle::Process(AliShuttleLogbookEntry* entry)
590 // Makes data retrieval for all detectors in the configuration.
591 // entry: Shuttle logbook entry, contains run paramenters and status of detectors
592 // (Unprocessed, Inactive, Failed or Done).
593 // Returns kFALSE in case of error occured and kTRUE otherwise
596 if(!entry) return kFALSE;
598 fLogbookEntry = entry;
600 if (fLogbookEntry->IsDone())
602 Log("SHUTTLE","Process - Shuttle is already DONE. Updating logbook");
603 UpdateShuttleLogbook("shuttle_done");
608 // create ML instance that monitors this run
609 fMonaLisa = new TMonaLisaWriter(Form("%d", GetCurrentRun()), "SHUTTLE", "aliendb1.cern.ch");
610 // disable monitoring of other parameters that come e.g. from TFile
611 gMonitoringWriter = 0;
613 AliInfo(Form("\n\n \t\t\t^*^*^*^*^*^*^*^*^*^*^*^* run %d: START ^*^*^*^*^*^*^*^*^*^*^*^* \n",
616 // Set run type from run type logbook into current fLogbookEntry
619 // Send the information to ML
620 TMonaLisaText mlStatus("SHUTTLE_status", "Processing");
623 mlList.Add(&mlStatus);
625 fMonaLisa->SendParameters(&mlList);
627 fLogbookEntry->Print("all");
630 Bool_t hasError = kFALSE;
632 AliCDBStorage *mainCDBSto = AliCDBManager::Instance()->GetStorage(fgkMainCDB);
633 if(mainCDBSto) mainCDBSto->QueryCDB(GetCurrentRun());
634 AliCDBStorage *mainRefSto = AliCDBManager::Instance()->GetStorage(fgkMainRefStorage);
635 if(mainRefSto) mainRefSto->QueryCDB(GetCurrentRun());
637 // Loop on detectors in the configuration
638 TIter iter(fConfig->GetDetectors());
639 TObjString* aDetector = 0;
641 while ((aDetector = (TObjString*) iter.Next()))
643 fCurrentDetector = aDetector->String();
645 if (ContinueProcessing() == kFALSE) continue;
647 AliInfo(Form("\n\n \t\t\t****** run %d - %s: START ******",
648 GetCurrentRun(), aDetector->GetName()));
650 for(Int_t iSys=0;iSys<3;iSys++) fFXSCalled[iSys]=kFALSE;
652 Log(fCurrentDetector.Data(), "Starting processing");
658 Log("SHUTTLE", "ERROR: Forking failed");
663 AliInfo(Form("In parent process of %d - %s: Starting monitoring",
664 GetCurrentRun(), aDetector->GetName()));
666 Long_t begin = time(0);
668 int status; // to be used with waitpid, on purpose an int (not Int_t)!
669 while (waitpid(pid, &status, WNOHANG) == 0)
671 Long_t expiredTime = time(0) - begin;
673 if (expiredTime > fConfig->GetPPTimeOut())
675 Log("SHUTTLE", Form("Process time out. Run time: %d seconds. Killing...",
682 gSystem->Sleep(1000);
686 if (expiredTime % 60 == 0)
687 Log("SHUTTLE", Form("Checked process. Run time: %d seconds.",
689 gSystem->Sleep(1000);
693 AliInfo(Form("In parent process of %d - %s: Client has terminated.",
694 GetCurrentRun(), aDetector->GetName()));
696 if (WIFEXITED(status))
698 Int_t returnCode = WEXITSTATUS(status);
700 Log("SHUTTLE", Form("The return code is %d", returnCode));
709 AliInfo(Form("In client process of %d - %s", GetCurrentRun(), aDetector->GetName()));
711 UInt_t result = ProcessCurrentDetector();
713 Int_t returnCode = 0; // will be set to 1 in case of an error
718 AliInfo(Form("\n \t\t\t****** run %d - %s: PREPROCESSOR ERROR ****** \n\n",
719 GetCurrentRun(), aDetector->GetName()));
721 else if (result == 2)
723 AliInfo(Form("\n \t\t\t****** run %d - %s: STORAGE ERROR ****** \n\n",
724 GetCurrentRun(), aDetector->GetName()));
727 AliInfo(Form("\n \t\t\t****** run %d - %s: DONE ****** \n\n",
728 GetCurrentRun(), aDetector->GetName()));
733 // Process successful: Update time_processed field in FXS logbooks!
734 if (UpdateTable() == kFALSE) returnCode = 1;
737 for (UInt_t iSys=0; iSys<3; iSys++)
739 if (fFXSCalled[iSys]) fFXSlist[iSys].Clear();
742 AliInfo(Form("Client process of %d - %s is exiting now with %d.",
743 GetCurrentRun(), aDetector->GetName(), returnCode));
745 // the client exits here
746 gSystem->Exit(returnCode);
748 AliError("We should never get here!!!");
752 AliInfo(Form("\n\n \t\t\t^*^*^*^*^*^*^*^*^*^*^*^* run %d: FINISH ^*^*^*^*^*^*^*^*^*^*^*^* \n",
755 //check if shuttle is done for this run, if so update logbook
756 TObjArray checkEntryArray;
757 checkEntryArray.SetOwner(1);
758 TString whereClause = Form("where run=%d", GetCurrentRun());
759 if (!QueryShuttleLogbook(whereClause.Data(), checkEntryArray) || checkEntryArray.GetEntries() == 0) {
760 Log("SHUTTLE", Form("Process - Warning: Cannot check status of run %d on Shuttle logbook!",
762 return hasError == kFALSE;
765 AliShuttleLogbookEntry* checkEntry = dynamic_cast<AliShuttleLogbookEntry*>
766 (checkEntryArray.At(0));
770 if (checkEntry->IsDone())
772 Log("SHUTTLE","Process - Shuttle is DONE. Updating logbook");
773 UpdateShuttleLogbook("shuttle_done");
777 for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
779 if (checkEntry->GetDetectorStatus(iDet) == AliShuttleLogbookEntry::kUnprocessed)
781 AliDebug(2, Form("Run %d: setting %s as \"not first time unprocessed\"",
782 checkEntry->GetRun(), GetDetName(iDet)));
783 fFirstUnprocessed[iDet] = kFALSE;
789 // remove ML instance
795 return hasError == kFALSE;
798 //______________________________________________________________________________________________
799 UInt_t AliShuttle::ProcessCurrentDetector()
802 // Makes data retrieval just for a specific detector (fCurrentDetector).
803 // Threre should be a configuration for this detector.
805 AliInfo(Form("Retrieving values for %s, run %d", fCurrentDetector.Data(), GetCurrentRun()));
807 UpdateShuttleStatus(AliShuttleStatus::kDCSStarted);
812 Bool_t aDCSError = kFALSE;
815 // TODO Test only... I've added a flag that allows to
816 // exclude DCS archive DB query
819 AliInfo("Skipping DCS processing!");
822 TString host(fConfig->GetDCSHost(fCurrentDetector));
823 Int_t port = fConfig->GetDCSPort(fCurrentDetector);
825 // Retrieval of Aliases
826 TObjString* anAlias = 0;
828 Int_t nTotAliases= ((TMap*)fConfig->GetDCSAliases(fCurrentDetector))->GetEntries();
829 TIter iterAliases(fConfig->GetDCSAliases(fCurrentDetector));
830 while ((anAlias = (TObjString*) iterAliases.Next()))
832 TObjArray *valueSet = new TObjArray();
833 valueSet->SetOwner(1);
835 if (((iAlias-1) % 500) == 0 || iAlias == nTotAliases)
836 AliInfo(Form("Querying DCS archive: alias %s (%d of %d)",
837 anAlias->GetName(), iAlias++, nTotAliases));
838 aDCSError = (GetValueSet(host, port, anAlias->String(), valueSet, kAlias) == 0);
842 dcsMap.Add(anAlias->Clone(), valueSet);
844 Log(fCurrentDetector,
845 Form("ProcessCurrentDetector - Error while retrieving alias %s",
846 anAlias->GetName()));
847 UpdateShuttleStatus(AliShuttleStatus::kDCSError);
853 // Retrieval of Data Points
856 Int_t nTotDPs= ((TMap*)fConfig->GetDCSDataPoints(fCurrentDetector))->GetEntries();
857 TIter iterDP(fConfig->GetDCSDataPoints(fCurrentDetector));
858 while ((aDP = (TObjString*) iterDP.Next()))
860 TObjArray *valueSet = new TObjArray();
861 valueSet->SetOwner(1);
862 if (((iDP-1) % 500) == 0 || iDP == nTotDPs)
863 AliInfo(Form("Querying DCS archive: DP %s (%d of %d)",
864 aDP->GetName(), iDP++, nTotDPs));
865 aDCSError = (GetValueSet(host, port, aDP->String(), valueSet, kDP) == 0);
869 dcsMap.Add(aDP->Clone(), valueSet);
871 Log(fCurrentDetector,
872 Form("ProcessCurrentDetector - Error while retrieving data point %s",
874 UpdateShuttleStatus(AliShuttleStatus::kDCSError);
881 // DCS Archive DB processing successful. Call Preprocessor!
882 UpdateShuttleStatus(AliShuttleStatus::kPPStarted);
884 AliPreprocessor* aPreprocessor =
885 dynamic_cast<AliPreprocessor*> (fPreprocessorMap.GetValue(fCurrentDetector));
887 aPreprocessor->Initialize(GetCurrentRun(), GetCurrentStartTime(), GetCurrentEndTime());
888 UInt_t aPPResult = aPreprocessor->Process(&dcsMap);
890 UInt_t returnValue = 0;
891 if (aPPResult == 0) { // Preprocessor error
892 UpdateShuttleStatus(AliShuttleStatus::kPPError);
894 } else if (fGridError == kFALSE) { // process and Grid storage ok!
895 UpdateShuttleStatus(AliShuttleStatus::kDone);
896 UpdateShuttleLogbook(fCurrentDetector, "DONE");
897 Log(fCurrentDetector.Data(),
898 "ProcessCurrentDetector - Preprocessor and Grid storage ended successfully");
900 } else { // Grid storage error (process ok, but object put in local storage)
901 UpdateShuttleStatus(AliShuttleStatus::kStoreFailed);
910 //______________________________________________________________________________________________
911 Bool_t AliShuttle::QueryShuttleLogbook(const char* whereClause,
914 // Query DAQ's Shuttle logbook and fills detector status object.
915 // Call QueryRunParameters to query DAQ logbook for run parameters.
919 // check connection, in case connect
920 if(!Connect(3)) return kFALSE;
923 sqlQuery = Form("select * from %s %s order by run", fConfig->GetShuttlelbTable(), whereClause);
925 TSQLResult* aResult = fServer[3]->Query(sqlQuery);
927 AliError(Form("Can't execute query <%s>!", sqlQuery.Data()));
931 AliDebug(2,Form("Query = %s", sqlQuery.Data()));
933 if(aResult->GetRowCount() == 0) {
934 // if(sqlQuery.EndsWith("where shuttle_done=0 order by run")){
935 // Log("SHUTTLE", "QueryShuttleLogbook - All runs in Shuttle Logbook are already DONE");
939 AliInfo("No entries in Shuttle Logbook match request");
945 // TODO Check field count!
946 const UInt_t nCols = 22;
947 if (aResult->GetFieldCount() != (Int_t) nCols) {
948 AliError("Invalid SQL result field number!");
954 while ((aRow = aResult->Next())) {
955 TString runString(aRow->GetField(0), aRow->GetFieldLength(0));
956 Int_t run = runString.Atoi();
958 AliShuttleLogbookEntry *entry = QueryRunParameters(run);
963 for(UInt_t ii = 0; ii < nCols; ii++)
964 entry->SetDetectorStatus(aResult->GetFieldName(ii), aRow->GetField(ii));
966 entries.AddLast(entry);
970 // if(sqlQuery.EndsWith("where shuttle_done=0 order by run"))
971 // Log("SHUTTLE", Form("QueryShuttleLogbook - Found %d unprocessed runs in Shuttle Logbook",
972 // entries.GetEntriesFast()));
977 //______________________________________________________________________________________________
978 AliShuttleLogbookEntry* AliShuttle::QueryRunParameters(Int_t run)
981 // Retrieve run parameters written in the DAQ logbook and sets them into AliShuttleLogbookEntry object
984 // check connection, in case connect
989 sqlQuery.Form("select * from %s where run=%d", fConfig->GetDAQlbTable(), run);
991 TSQLResult* aResult = fServer[3]->Query(sqlQuery);
993 AliError(Form("Can't execute query <%s>!", sqlQuery.Data()));
997 if (aResult->GetRowCount() == 0) {
998 Log("SHUTTLE", Form("QueryRunParameters - No entry in DAQ Logbook for run %d. Skipping", run));
1003 if (aResult->GetRowCount() > 1) {
1004 AliError(Form("More than one entry in DAQ Logbook for run %d. Skipping", run));
1009 TSQLRow* aRow = aResult->Next();
1012 AliError(Form("Could not retrieve row for run %d. Skipping", run));
1017 AliShuttleLogbookEntry* entry = new AliShuttleLogbookEntry(run);
1019 for (Int_t ii = 0; ii < aResult->GetFieldCount(); ii++)
1020 entry->SetRunParameter(aResult->GetFieldName(ii), aRow->GetField(ii));
1022 UInt_t startTime = entry->GetStartTime();
1023 UInt_t endTime = entry->GetEndTime();
1025 if (!startTime || !endTime || startTime > endTime) {
1027 Form("QueryRunParameters - Invalid parameters for Run %d: startTime = %d, endTime = %d",
1028 run, startTime, endTime));
1041 //______________________________________________________________________________________________
1042 Bool_t AliShuttle::TryToStoreAgain()
1044 // Called in case the detector failed to store the object in Grid OCDB
1045 // It tries to store the object again, if it does not find more recent and overlapping objects
1046 // Calls underlying TryToStoreAgain(const char*) function twice, for OCDB and Reference storage.
1048 AliInfo("Trying to store OCDB data again...");
1049 Bool_t resultCDB = TryToStoreAgain(fgkMainCDB);
1051 AliInfo("Trying to store reference data again...");
1052 Bool_t resultRef = TryToStoreAgain(fgkMainRefStorage);
1054 return resultCDB && resultRef;
1057 //______________________________________________________________________________________________
1058 Bool_t AliShuttle::TryToStoreAgain(TString& gridURI)
1060 // Called by TryToStoreAgain(), performs actual storage retry
1062 TObjArray* gridIds=0;
1064 Bool_t result = kTRUE;
1066 const char* type = 0;
1068 if(gridURI == fgkMainCDB) {
1070 backupURI = fgkLocalCDB;
1071 } else if(gridURI == fgkMainRefStorage) {
1073 backupURI = fgkLocalRefStorage;
1075 AliError(Form("Invalid storage URI: %s", gridURI.Data()));
1079 AliCDBManager* man = AliCDBManager::Instance();
1081 AliCDBStorage *gridSto = man->GetStorage(gridURI);
1083 Log(fCurrentDetector.Data(),
1084 Form("TryToStoreAgain - cannot activate main %s storage", type));
1088 gridIds = gridSto->GetQueryCDBList();
1090 // get objects previously stored in local CDB
1091 AliCDBStorage *backupSto = man->GetStorage(backupURI);
1092 AliCDBPath aPath(GetOfflineDetName(fCurrentDetector.Data()),"*","*");
1093 // Local objects were stored with current run as Grid version!
1094 TList* localEntries = backupSto->GetAll(aPath.GetPath(), GetCurrentRun(), GetCurrentRun());
1095 localEntries->SetOwner(1);
1097 // loop on local stored objects
1098 TIter localIter(localEntries);
1099 AliCDBEntry *aLocEntry = 0;
1100 while((aLocEntry = dynamic_cast<AliCDBEntry*> (localIter.Next()))){
1101 aLocEntry->SetOwner(1);
1102 AliCDBId aLocId = aLocEntry->GetId();
1103 aLocEntry->SetVersion(-1);
1104 aLocEntry->SetSubVersion(-1);
1106 // loop on Grid valid Id's
1107 Bool_t store = kTRUE;
1108 TIter gridIter(gridIds);
1109 AliCDBId* aGridId = 0;
1110 while((aGridId = dynamic_cast<AliCDBId*> (gridIter.Next()))){
1111 // If local object is valid up to infinity we store it only if it is
1112 // the first unprocessed run!
1113 if (aLocId.GetLastRun() == AliCDBRunRange::Infinity())
1115 if (!fFirstUnprocessed[GetDetPos(fCurrentDetector)])
1117 Log(fCurrentDetector.Data(),
1118 ("TryToStoreAgain - This object has validity infinite but "
1119 "there are previous unprocessed runs!"));
1125 if(aGridId->GetPath() != aLocId.GetPath()) continue;
1126 // skip all objects valid up to infinity
1127 if(aGridId->GetLastRun() == AliCDBRunRange::Infinity()) continue;
1128 // if we get here, it means there's already some more recent object stored on Grid!
1134 Log(fCurrentDetector.Data(),
1135 Form("TryToStoreAgain - A more recent object already exists in %s storage: <%s>",
1136 type, aGridId->ToString().Data()));
1137 // removing local filename...
1138 // TODO maybe it's better not to remove it, it was not copied to the Grid!
1140 backupSto->IdToFilename(aLocId, filename);
1141 AliInfo(Form("Removing local file %s", filename.Data()));
1142 gSystem->Exec(Form("rm %s",filename.Data()));
1146 // If we get here, the file can be stored!
1147 Bool_t storeOk = gridSto->Put(aLocEntry);
1149 Log(fCurrentDetector.Data(),
1150 Form("TryToStoreAgain - Object <%s> successfully put into %s storage",
1151 aLocId.ToString().Data(), type));
1153 // removing local filename...
1155 backupSto->IdToFilename(aLocId, filename);
1156 AliInfo(Form("Removing local file %s", filename.Data()));
1157 gSystem->Exec(Form("rm %s", filename.Data()));
1160 Log(fCurrentDetector.Data(),
1161 Form("TryToStoreAgain - Grid %s storage of object <%s> failed again",
1162 type, aLocId.ToString().Data()));
1166 localEntries->Clear();
1171 //______________________________________________________________________________________________
1172 Bool_t AliShuttle::GetValueSet(const char* host, Int_t port, const char* entry,
1173 TObjArray* valueSet, DCSType type)
1175 // Retrieve all "entry" data points from the DCS server
1176 // host, port: TSocket connection parameters
1177 // entry: name of the alias or data point
1178 // valueSet: array of retrieved AliDCSValue's
1179 // type: kAlias or kDP
1181 AliDCSClient client(host, port, fTimeout, fRetries);
1182 if (!client.IsConnected())
1191 result = client.GetAliasValues(entry,
1192 GetCurrentStartTime(), GetCurrentEndTime(), valueSet);
1196 result = client.GetDPValues(entry,
1197 GetCurrentStartTime(), GetCurrentEndTime(), valueSet);
1202 Log(fCurrentDetector.Data(), Form("GetValueSet - Can't get '%s'! Reason: %s",
1203 entry, AliDCSClient::GetErrorString(result)));
1205 if (result == AliDCSClient::fgkServerError)
1207 Log(fCurrentDetector.Data(), Form("GetValueSet - Server error: %s",
1208 client.GetServerError().Data()));
1217 //______________________________________________________________________________________________
1218 const char* AliShuttle::GetFile(Int_t system, const char* detector,
1219 const char* id, const char* source)
1221 // Get calibration file from file exchange servers
1222 // First queris the FXS database for the file name, using the run, detector, id and source info
1223 // then calls RetrieveFile(filename) for actual copy to local disk
1224 // run: current run being processed (given by Logbook entry fLogbookEntry)
1225 // detector: the Preprocessor name
1226 // id: provided as a parameter by the Preprocessor
1227 // source: provided by the Preprocessor through GetFileSources function
1229 // check connection, in case connect
1230 if (!Connect(system))
1232 Log(detector, Form("GetFile - Couldn't connect to %s FXS database", GetSystemName(system)));
1236 // Query preparation
1237 TString sourceName(source);
1239 TString sqlQueryStart = Form("select filePath,size,fileChecksum from %s where",
1240 fConfig->GetFXSdbTable(system));
1241 TString whereClause = Form("run=%d and detector=\"%s\" and fileId=\"%s\"",
1242 GetCurrentRun(), detector, id);
1246 whereClause += Form(" and DAQsource=\"%s\"", source);
1248 else if (system == kDCS)
1252 else if (system == kHLT)
1254 whereClause += Form(" and DDLnumbers=\"%s\"", source);
1258 TString sqlQuery = Form("%s %s", sqlQueryStart.Data(), whereClause.Data());
1260 AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
1263 TSQLResult* aResult = 0;
1264 aResult = dynamic_cast<TSQLResult*> (fServer[system]->Query(sqlQuery));
1266 Log(detector, Form("GetFileName - Can't execute SQL query to %s database for: id = %s, source = %s",
1267 GetSystemName(system), id, sourceName.Data()));
1271 if(aResult->GetRowCount() == 0)
1274 Form("GetFileName - No entry in %s FXS db for: id = %s, source = %s",
1275 GetSystemName(system), id, sourceName.Data()));
1280 if (aResult->GetRowCount() > 1) {
1282 Form("GetFileName - More than one entry in %s FXS db for: id = %s, source = %s",
1283 GetSystemName(system), id, sourceName.Data()));
1288 if (aResult->GetFieldCount() != nFields) {
1290 Form("GetFileName - Wrong field count in %s FXS db for: id = %s, source = %s",
1291 GetSystemName(system), id, sourceName.Data()));
1296 TSQLRow* aRow = dynamic_cast<TSQLRow*> (aResult->Next());
1299 Log(detector, Form("GetFileName - Empty set result in %s FXS db from query: id = %s, source = %s",
1300 GetSystemName(system), id, sourceName.Data()));
1305 TString filePath(aRow->GetField(0), aRow->GetFieldLength(0));
1306 TString fileSize(aRow->GetField(1), aRow->GetFieldLength(1));
1307 TString fileChecksum(aRow->GetField(2), aRow->GetFieldLength(2));
1312 AliDebug(2, Form("filePath = %s; size = %s, fileChecksum = %s",
1313 filePath.Data(), fileSize.Data(), fileChecksum.Data()));
1315 // retrieved file is renamed to make it unique
1316 TString localFileName = Form("%s_%s_%d_%s_%s.shuttle",
1317 GetSystemName(system), detector, GetCurrentRun(), id, sourceName.Data());
1320 // file retrieval from FXS
1321 UInt_t nRetries = 0;
1322 UInt_t maxRetries = 3;
1323 Bool_t result = kFALSE;
1325 // copy!! if successful TSystem::Exec returns 0
1326 while(nRetries++ < maxRetries) {
1327 AliDebug(2, Form("Trying to copy file. Retry # %d", nRetries));
1328 result = RetrieveFile(system, filePath.Data(), localFileName.Data());
1331 Log(detector, Form("GetFileName - Copy of file %s from %s FXS failed",
1332 filePath.Data(), GetSystemName(system)));
1335 AliInfo(Form("File %s copied from %s FXS into %s/%s",
1336 filePath.Data(), GetSystemName(system),
1337 GetShuttleTempDir(), localFileName.Data()));
1340 if (fileChecksum.Length()>0)
1342 // compare md5sum of local file with the one stored in the FXS DB
1343 Int_t md5Comp = gSystem->Exec(Form("md5sum %s/%s |grep %s 2>&1 > /dev/null",
1344 GetShuttleTempDir(), localFileName.Data(), fileChecksum.Data()));
1348 Log(detector, Form("GetFileName - md5sum of file %s does not match with local copy!",
1354 Log(fCurrentDetector, Form("GetFile - md5sum of file %s not set in %s database, skipping comparison",
1355 filePath.Data(), GetSystemName(system)));
1360 if(!result) return 0;
1362 fFXSCalled[system]=kTRUE;
1363 TObjString *fileParams = new TObjString(Form("%s#!?!#%s", id, sourceName.Data()));
1364 fFXSlist[system].Add(fileParams);
1366 static TString fullLocalFileName;
1367 fullLocalFileName = TString::Format("%s/%s", GetShuttleTempDir(), localFileName.Data());
1369 AliInfo(Form("fullLocalFileName = %s", fullLocalFileName.Data()));
1371 return fullLocalFileName.Data();
1375 //______________________________________________________________________________________________
1376 Bool_t AliShuttle::RetrieveFile(UInt_t system, const char* fxsFileName, const char* localFileName)
1378 // Copies file from FXS to local Shuttle machine
1380 // check temp directory: trying to cd to temp; if it does not exist, create it
1381 AliDebug(2, Form("Copy file %s from %s FXS into %s/%s",
1382 GetSystemName(system), fxsFileName, GetShuttleTempDir(), localFileName));
1384 void* dir = gSystem->OpenDirectory(GetShuttleTempDir());
1386 if (gSystem->mkdir(GetShuttleTempDir(), kTRUE)) {
1387 AliError(Form("Can't open directory <%s>", GetShuttleTempDir()));
1392 gSystem->FreeDirectory(dir);
1395 TString baseFXSFolder;
1398 baseFXSFolder = "FES/";
1400 else if (system == kDCS)
1404 else if (system == kHLT)
1406 baseFXSFolder = "~/";
1410 TString command = Form("scp -oPort=%d -2 %s@%s:%s%s %s/%s",
1411 fConfig->GetFXSPort(system),
1412 fConfig->GetFXSUser(system),
1413 fConfig->GetFXSHost(system),
1414 baseFXSFolder.Data(),
1416 GetShuttleTempDir(),
1419 AliDebug(2, Form("%s",command.Data()));
1421 Bool_t result = (gSystem->Exec(command.Data()) == 0);
1426 //______________________________________________________________________________________________
1427 TList* AliShuttle::GetFileSources(Int_t system, const char* detector, const char* id)
1429 // Get sources producing the condition file Id from file exchange servers
1433 AliError("DCS system has only one source of data!");
1438 // check connection, in case connect
1439 if (!Connect(system))
1441 Log(detector, Form("GetFile - Couldn't connect to %s FXS database", GetSystemName(system)));
1445 TString sourceName = 0;
1448 sourceName = "DAQsource";
1449 } else if (system == kHLT)
1451 sourceName = "DDLnumbers";
1454 TString sqlQueryStart = Form("select %s from %s where", sourceName.Data(), fConfig->GetFXSdbTable(system));
1455 TString whereClause = Form("run=%d and detector=\"%s\" and fileId=\"%s\"",
1456 GetCurrentRun(), detector, id);
1457 TString sqlQuery = Form("%s %s", sqlQueryStart.Data(), whereClause.Data());
1459 AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
1462 TSQLResult* aResult;
1463 aResult = fServer[system]->Query(sqlQuery);
1465 Log(detector, Form("GetFileSources - Can't execute SQL query to %s database for id: %s",
1466 GetSystemName(system), id));
1470 if (aResult->GetRowCount() == 0)
1473 Form("GetFileSources - No entry in %s FXS table for id: %s", GetSystemName(system), id));
1479 TList *list = new TList();
1482 while ((aRow = aResult->Next()))
1485 TString source(aRow->GetField(0), aRow->GetFieldLength(0));
1486 AliDebug(2, Form("%s = %s", sourceName.Data(), source.Data()));
1487 list->Add(new TObjString(source));
1496 //______________________________________________________________________________________________
1497 Bool_t AliShuttle::Connect(Int_t system)
1499 // Connect to MySQL Server of the system's FXS MySQL databases
1500 // DAQ Logbook, Shuttle Logbook and DAQ FXS db are on the same host
1502 // check connection: if already connected return
1503 if(fServer[system] && fServer[system]->IsConnected()) return kTRUE;
1505 TString dbHost, dbUser, dbPass, dbName;
1507 if (system < 3) // FXS db servers
1509 dbHost = Form("mysql://%s:%d", fConfig->GetFXSdbHost(system), fConfig->GetFXSdbPort(system));
1510 dbUser = fConfig->GetFXSdbUser(system);
1511 dbPass = fConfig->GetFXSdbPass(system);
1512 dbName = fConfig->GetFXSdbName(system);
1513 } else { // Run & Shuttle logbook servers
1514 // TODO Will the Shuttle logbook server be the same as the Run logbook server ???
1515 dbHost = Form("mysql://%s:%d", fConfig->GetDAQlbHost(), fConfig->GetDAQlbPort());
1516 dbUser = fConfig->GetDAQlbUser();
1517 dbPass = fConfig->GetDAQlbPass();
1518 dbName = fConfig->GetDAQlbDB();
1521 fServer[system] = TSQLServer::Connect(dbHost.Data(), dbUser.Data(), dbPass.Data());
1522 if (!fServer[system] || !fServer[system]->IsConnected()) {
1525 AliError(Form("Can't establish connection to FXS database for %s",
1526 AliShuttleInterface::GetSystemName(system)));
1528 AliError("Can't establish connection to Run logbook.");
1530 if(fServer[system]) delete fServer[system];
1535 TSQLResult* aResult=0;
1538 aResult = fServer[kDAQ]->GetTables(dbName.Data());
1541 aResult = fServer[kDCS]->GetTables(dbName.Data());
1544 aResult = fServer[kHLT]->GetTables(dbName.Data());
1547 aResult = fServer[3]->GetTables(dbName.Data());
1555 //______________________________________________________________________________________________
1556 Bool_t AliShuttle::UpdateTable()
1558 // Update FXS table filling time_processed field in all rows corresponding to current run and detector
1560 Bool_t result = kTRUE;
1562 for (UInt_t system=0; system<3; system++)
1564 if(!fFXSCalled[system]) continue;
1566 // check connection, in case connect
1567 if (!Connect(system))
1569 Log(fCurrentDetector, Form("UpdateTable - Couldn't connect to %s FXS database", GetSystemName(system)));
1574 TTimeStamp now; // now
1576 // Loop on FXS list entries
1577 TIter iter(&fFXSlist[system]);
1578 TObjString *aFXSentry=0;
1579 while ((aFXSentry = dynamic_cast<TObjString*> (iter.Next())))
1581 TString aFXSentrystr = aFXSentry->String();
1582 TObjArray *aFXSarray = aFXSentrystr.Tokenize("#!?!#");
1583 if (!aFXSarray || aFXSarray->GetEntries() != 2 )
1585 Log(fCurrentDetector, Form("UpdateTable - error updating %s FXS entry. Check string: <%s>",
1586 GetSystemName(system), aFXSentrystr.Data()));
1587 if(aFXSarray) delete aFXSarray;
1591 const char* fileId = ((TObjString*) aFXSarray->At(0))->GetName();
1592 const char* source = ((TObjString*) aFXSarray->At(1))->GetName();
1594 TString whereClause;
1597 whereClause = Form("where run=%d and detector=\"%s\" and fileId=\"%s\" and DAQsource=\"%s\";",
1598 GetCurrentRun(), fCurrentDetector.Data(), fileId, source);
1600 else if (system == kDCS)
1602 whereClause = Form("where run=%d and detector=\"%s\" and fileId=\"%s\";",
1603 GetCurrentRun(), fCurrentDetector.Data(), fileId);
1605 else if (system == kHLT)
1607 whereClause = Form("where run=%d and detector=\"%s\" and fileId=\"%s\" and DDLnumbers=\"%s\";",
1608 GetCurrentRun(), fCurrentDetector.Data(), fileId, source);
1613 TString sqlQuery = Form("update %s set time_processed=%d %s", fConfig->GetFXSdbTable(system),
1614 now.GetSec(), whereClause.Data());
1616 AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
1619 TSQLResult* aResult;
1620 aResult = dynamic_cast<TSQLResult*> (fServer[system]->Query(sqlQuery));
1623 Log(fCurrentDetector, Form("UpdateTable - %s db: can't execute SQL query <%s>",
1624 GetSystemName(system), sqlQuery.Data()));
1635 //______________________________________________________________________________________________
1636 Bool_t AliShuttle::UpdateShuttleLogbook(const char* detector, const char* status)
1639 // Update Shuttle logbook filling detector or shuttle_done column
1640 // ex. of usage: UpdateShuttleLogbook("PHOS", "DONE") or UpdateShuttleLogbook("shuttle_done")
1643 // check connection, in case connect
1645 Log("SHUTTLE", "UpdateShuttleLogbook - Couldn't connect to DAQ Logbook.");
1649 TString detName(detector);
1651 if(detName == "shuttle_done")
1653 setClause = "set shuttle_done=1";
1655 // Send the information to ML
1656 TMonaLisaText mlStatus("SHUTTLE_status", "Done");
1659 mlList.Add(&mlStatus);
1661 fMonaLisa->SendParameters(&mlList);
1663 TString statusStr(status);
1664 if(statusStr.Contains("done", TString::kIgnoreCase) ||
1665 statusStr.Contains("failed", TString::kIgnoreCase)){
1666 setClause = Form("set %s=\"%s\"", detector, status);
1669 Form("UpdateShuttleLogbook - Invalid status <%s> for detector %s",
1675 TString whereClause = Form("where run=%d", GetCurrentRun());
1677 TString sqlQuery = Form("update %s %s %s",
1678 fConfig->GetShuttlelbTable(), setClause.Data(), whereClause.Data());
1680 AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
1683 TSQLResult* aResult;
1684 aResult = dynamic_cast<TSQLResult*> (fServer[3]->Query(sqlQuery));
1686 Log("SHUTTLE", Form("UpdateShuttleLogbook - Can't execute query <%s>", sqlQuery.Data()));
1694 //______________________________________________________________________________________________
1695 Int_t AliShuttle::GetCurrentRun() const
1697 // Get current run from logbook entry
1699 return fLogbookEntry ? fLogbookEntry->GetRun() : -1;
1702 //______________________________________________________________________________________________
1703 UInt_t AliShuttle::GetCurrentStartTime() const
1705 // get current start time
1707 return fLogbookEntry ? fLogbookEntry->GetStartTime() : 0;
1710 //______________________________________________________________________________________________
1711 UInt_t AliShuttle::GetCurrentEndTime() const
1713 // get current end time from logbook entry
1715 return fLogbookEntry ? fLogbookEntry->GetEndTime() : 0;
1718 //______________________________________________________________________________________________
1719 void AliShuttle::Log(const char* detector, const char* message)
1721 // Fill log string with a message
1723 void* dir = gSystem->OpenDirectory(GetShuttleLogDir());
1725 if (gSystem->mkdir(GetShuttleLogDir(), kTRUE)) {
1726 AliError(Form("Can't open directory <%s>", GetShuttleLogDir()));
1731 gSystem->FreeDirectory(dir);
1734 TString toLog = Form("%s (%d): %s - ", TTimeStamp(time(0)).AsString("s"), getpid(), detector);
1735 if (GetCurrentRun() >= 0)
1736 toLog += Form("run %d - ", GetCurrentRun());
1737 toLog += Form("%s", message);
1739 AliInfo(toLog.Data());
1742 if (GetCurrentRun() >= 0)
1743 fileName.Form("%s/%s_%d.log", GetShuttleLogDir(), detector, GetCurrentRun());
1745 fileName.Form("%s/%s.log", GetShuttleLogDir(), detector);
1747 gSystem->ExpandPathName(fileName);
1750 logFile.open(fileName, ofstream::out | ofstream::app);
1752 if (!logFile.is_open()) {
1753 AliError(Form("Could not open file %s", fileName.Data()));
1757 logFile << toLog.Data() << "\n";
1762 //______________________________________________________________________________________________
1763 Bool_t AliShuttle::Collect(Int_t run)
1766 // Collects conditions data for all UNPROCESSED run written to DAQ LogBook in case of run = -1 (default)
1767 // If a dedicated run is given this run is processed
1769 // In operational mode, this is the Shuttle function triggered by the EOR signal.
1773 Log("SHUTTLE","Collect - Shuttle called. Collecting conditions data for unprocessed runs");
1775 Log("SHUTTLE", Form("Collect - Shuttle called. Collecting conditions data for run %d", run));
1777 SetLastAction("Starting");
1779 TString whereClause("where shuttle_done=0");
1781 whereClause += Form(" and run=%d", run);
1783 TObjArray shuttleLogbookEntries;
1784 if (!QueryShuttleLogbook(whereClause, shuttleLogbookEntries))
1786 Log("SHUTTLE", "Collect - Can't retrieve entries from Shuttle logbook");
1790 if (shuttleLogbookEntries.GetEntries() == 0)
1793 Log("SHUTTLE","Collect - Found no UNPROCESSED runs in Shuttle logbook");
1795 Log("SHUTTLE", Form("Collect - Run %d is already DONE "
1796 "or it does not exist in Shuttle logbook", run));
1800 for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
1801 fFirstUnprocessed[iDet] = kTRUE;
1805 // query Shuttle logbook for earlier runs, check if some detectors are unprocessed,
1806 // flag them into fFirstUnprocessed array
1807 TString whereClause(Form("where shuttle_done=0 and run < %d", run));
1808 TObjArray tmpLogbookEntries;
1809 if (!QueryShuttleLogbook(whereClause, tmpLogbookEntries))
1811 Log("SHUTTLE", "Collect - Can't retrieve entries from Shuttle logbook");
1815 TIter iter(&tmpLogbookEntries);
1816 AliShuttleLogbookEntry* anEntry = 0;
1817 while ((anEntry = dynamic_cast<AliShuttleLogbookEntry*> (iter.Next())))
1819 for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
1821 if (anEntry->GetDetectorStatus(iDet) == AliShuttleLogbookEntry::kUnprocessed)
1823 AliDebug(2, Form("Run %d: setting %s as \"not first time unprocessed\"",
1824 anEntry->GetRun(), GetDetName(iDet)));
1825 fFirstUnprocessed[iDet] = kFALSE;
1833 if (!RetrieveConditionsData(shuttleLogbookEntries))
1835 Log("SHUTTLE", "Collect - Process of at least one run failed");
1839 Log("SHUTTLE", "Collect - Requested run(s) successfully processed");
1843 //______________________________________________________________________________________________
1844 Bool_t AliShuttle::RetrieveConditionsData(const TObjArray& dateEntries)
1846 // Retrieve conditions data for all runs that aren't processed yet
1848 Bool_t hasError = kFALSE;
1850 TIter iter(&dateEntries);
1851 AliShuttleLogbookEntry* anEntry;
1853 while ((anEntry = (AliShuttleLogbookEntry*) iter.Next())){
1854 if (!Process(anEntry)){
1858 // clean SHUTTLE temp directory
1859 TString command = Form("rm -f %s/*.shuttle", GetShuttleTempDir());
1860 gSystem->Exec(command.Data());
1863 return hasError == kFALSE;
1866 //______________________________________________________________________________________________
1867 ULong_t AliShuttle::GetTimeOfLastAction() const
1871 fMonitoringMutex->Lock();
1873 tmp = fLastActionTime;
1875 fMonitoringMutex->UnLock();
1880 //______________________________________________________________________________________________
1881 const TString AliShuttle::GetLastAction() const
1883 // returns a string description of the last action
1887 fMonitoringMutex->Lock();
1891 fMonitoringMutex->UnLock();
1896 //______________________________________________________________________________________________
1897 void AliShuttle::SetLastAction(const char* action)
1899 // updates the monitoring variables
1901 fMonitoringMutex->Lock();
1903 fLastAction = action;
1904 fLastActionTime = time(0);
1906 fMonitoringMutex->UnLock();
1909 //______________________________________________________________________________________________
1910 const char* AliShuttle::GetRunParameter(const char* param)
1912 // returns run parameter read from DAQ logbook
1914 if(!fLogbookEntry) {
1915 AliError("No logbook entry!");
1919 return fLogbookEntry->GetRunParameter(param);
1922 //______________________________________________________________________________________________
1923 AliCDBEntry* AliShuttle::GetFromOCDB(const AliCDBPath& path)
1925 // returns obiect from OCDB valid for current run
1927 AliCDBStorage *sto = AliCDBManager::Instance()->GetStorage(fgkMainCDB);
1930 Log("SHUTTLE", "GetFromOCDB - Cannot activate main OCDB for query!");
1934 return dynamic_cast<AliCDBEntry*> (sto->Get(path, GetCurrentRun()));
1937 //______________________________________________________________________________________________
1938 Bool_t AliShuttle::SendMail()
1940 // sends a mail to the subdetector expert in case of preprocessor error
1942 void* dir = gSystem->OpenDirectory(GetShuttleLogDir());
1945 if (gSystem->mkdir(GetShuttleLogDir(), kTRUE))
1947 AliError(Form("Can't open directory <%s>", GetShuttleLogDir()));
1952 gSystem->FreeDirectory(dir);
1955 TString bodyFileName;
1956 bodyFileName.Form("%s/mail.body", GetShuttleLogDir());
1957 gSystem->ExpandPathName(bodyFileName);
1960 mailBody.open(bodyFileName, ofstream::out);
1962 if (!mailBody.is_open())
1964 AliError(Form("Could not open mail body file %s", bodyFileName.Data()));
1969 TIter iterExperts(fConfig->GetResponsibles(fCurrentDetector));
1970 TObjString *anExpert=0;
1971 while ((anExpert = (TObjString*) iterExperts.Next()))
1973 to += Form("%s,", anExpert->GetName());
1975 to.Remove(to.Length()-1);
1976 AliDebug(2, Form("to: %s",to.Data()));
1978 // TODO this will be removed...
1979 if (to.Contains("not_yet_set")) {
1980 AliInfo("List of detector responsibles not yet set!");
1984 TString cc="alberto.colla@cern.ch";
1986 TString subject = Form("%s Shuttle preprocessor error in run %d !",
1987 fCurrentDetector.Data(), GetCurrentRun());
1988 AliDebug(2, Form("subject: %s", subject.Data()));
1990 TString body = Form("Dear %s expert(s), \n\n", fCurrentDetector.Data());
1991 body += Form("SHUTTLE just detected that your preprocessor "
1992 "exited with ERROR state in run %d!!\n\n", GetCurrentRun());
1993 body += Form("Please check %s status on the web page asap!\n\n", fCurrentDetector.Data());
1994 body += Form("The last 10 lines of %s log file are following:\n\n");
1996 AliDebug(2, Form("Body begin: %s", body.Data()));
1998 mailBody << body.Data();
2000 mailBody.open(bodyFileName, ofstream::out | ofstream::app);
2002 TString logFileName = Form("%s/%s_%d.log", GetShuttleLogDir(), fCurrentDetector.Data(), GetCurrentRun());
2003 TString tailCommand = Form("tail -n 10 %s >> %s", logFileName.Data(), bodyFileName.Data());
2004 if (gSystem->Exec(tailCommand.Data()))
2006 mailBody << Form("%s log file not found ...\n\n", fCurrentDetector.Data());
2009 TString endBody = Form("------------------------------------------------------\n\n");
2010 endBody += Form("In case of problems please contact the SHUTTLE core team.\n\n");
2011 endBody += "Please do not answer this message directly, it is automatically generated.\n\n";
2012 endBody += "Sincerely yours,\n\n \t\t\tthe SHUTTLE\n";
2014 AliDebug(2, Form("Body end: %s", endBody.Data()));
2016 mailBody << endBody.Data();
2021 TString mailCommand = Form("mail -s \"%s\" -c %s %s < %s",
2025 bodyFileName.Data());
2026 AliDebug(2, Form("mail command: %s", mailCommand.Data()));
2028 Bool_t result = gSystem->Exec(mailCommand.Data());
2033 //______________________________________________________________________________________________
2034 void AliShuttle::SetRunType()
2036 // Gets run type from logbook and fills current Shuttle logbook entry
2038 // check connection, in case connect
2040 Log("SHUTTLE", "GetRunType - Couldn't connect to DAQ Logbook.");
2044 TString sqlQuery = Form("select detector,run_type from %s where run_number=%d",
2045 fConfig->GetRunTypelbTable(), GetCurrentRun());
2047 AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
2050 TSQLResult* aResult;
2051 aResult = dynamic_cast<TSQLResult*> (fServer[3]->Query(sqlQuery));
2053 Log("SHUTTLE", Form("GetRunType - Can't execute query <%s>", sqlQuery.Data()));
2058 while ((aRow = aResult->Next())) {
2059 TString det(aRow->GetField(0), aRow->GetFieldLength(0));
2060 TString runType(aRow->GetField(1), aRow->GetFieldLength(1));
2062 fLogbookEntry->SetRunType(det, runType);
2072 //______________________________________________________________________________________________
2073 const char* AliShuttle::GetRunType(const char* detCode)
2075 // returns run type read from "run type" logbook
2077 if(!fLogbookEntry) {
2078 AliError("No logbook entry!");
2082 return fLogbookEntry->GetRunType(detCode);
2085 //______________________________________________________________________________________________
2086 void AliShuttle::SetShuttleTempDir(const char* tmpDir)
2088 // sets Shuttle temp directory
2090 fgkShuttleTempDir = gSystem->ExpandPathName(tmpDir);
2093 //______________________________________________________________________________________________
2094 void AliShuttle::SetShuttleLogDir(const char* logDir)
2096 // sets Shuttle log directory
2098 fgkShuttleLogDir = gSystem->ExpandPathName(logDir);