1 /**************************************************************************
2 * Copyright(c) 1998-1999, ALICE Experiment at CERN, All rights reserved. *
4 * Author: The ALICE Off-line Project. *
5 * Contributors are mentioned in the code where appropriate. *
7 * Permission to use, copy, modify and distribute this software and its *
8 * documentation strictly for non-commercial purposes is hereby granted *
9 * without fee, provided that the above copyright notice appears in all *
10 * copies and that both the copyright notice and this permission notice *
11 * appear in the supporting documentation. The authors make no claims *
12 * about the suitability of this software for any purpose. It is *
13 * provided "as is" without express or implied warranty. *
14 **************************************************************************/
18 Revision 1.27 2007/01/30 17:52:42 jgrosseo
19 adding monalisa monitoring
21 Revision 1.26 2007/01/23 19:20:03 acolla
22 Removed old ldif files, added TOF, MCH ldif files. Added some options in
23 AliShuttleConfig::Print. Added in Ali Shuttle: SetShuttleTempDir and
26 Revision 1.25 2007/01/15 19:13:52 acolla
27 Moved some AliInfo to AliDebug in SendMail function
29 Revision 1.21 2006/12/07 08:51:26 jgrosseo
31 table, db names in ldap configuration
32 added GRP preprocessor
33 DCS data can also be retrieved by data point
35 Revision 1.20 2006/11/16 16:16:48 jgrosseo
36 introducing strict run ordering flag
37 removed giving preprocessor name to preprocessor, they have to know their name themselves ;-)
39 Revision 1.19 2006/11/06 14:23:04 jgrosseo
40 major update (Alberto)
41 o) reading of run parameters from the logbook
42 o) online offline naming conversion
43 o) standalone DCSclient package
45 Revision 1.18 2006/10/20 15:22:59 jgrosseo
46 o) Adding time out to the execution of the preprocessors: The Shuttle forks and the parent process monitors the child
47 o) Merging Collect, CollectAll, CollectNew function
48 o) Removing implementation of empty copy constructors (declaration still there!)
50 Revision 1.17 2006/10/05 16:20:55 jgrosseo
51 adapting to new CDB classes
53 Revision 1.16 2006/10/05 15:46:26 jgrosseo
54 applying to the new interface
56 Revision 1.15 2006/10/02 16:38:39 jgrosseo
59 storing of objects that failed to be stored to the grid before
60 interfacing of shuttle status table in daq system
62 Revision 1.14 2006/08/29 09:16:05 jgrosseo
65 Revision 1.13 2006/08/15 10:50:00 jgrosseo
66 effc++ corrections (alberto)
68 Revision 1.12 2006/08/08 14:19:29 jgrosseo
69 Update to shuttle classes (Alberto)
71 - Possibility to set the full object's path in the Preprocessor's and
72 Shuttle's Store functions
73 - Possibility to extend the object's run validity in the same classes
74 ("startValidity" and "validityInfinite" parameters)
75 - Implementation of the StoreReferenceData function to store reference
76 data in a dedicated CDB storage.
78 Revision 1.11 2006/07/21 07:37:20 jgrosseo
79 last run is stored after each run
81 Revision 1.10 2006/07/20 09:54:40 jgrosseo
82 introducing status management: The processing per subdetector is divided into several steps,
83 after each step the status is stored on disk. If the system crashes in any of the steps the Shuttle
84 can keep track of the number of failures and skips further processing after a certain threshold is
85 exceeded. These thresholds can be configured in LDAP.
87 Revision 1.9 2006/07/19 10:09:55 jgrosseo
88 new configuration, accesst to DAQ FES (Alberto)
90 Revision 1.8 2006/07/11 12:44:36 jgrosseo
91 adding parameters for extended validity range of data produced by preprocessor
93 Revision 1.7 2006/07/10 14:37:09 jgrosseo
94 small fix + todo comment
96 Revision 1.6 2006/07/10 13:01:41 jgrosseo
97 enhanced storing of last sucessfully processed run (alberto)
99 Revision 1.5 2006/07/04 14:59:57 jgrosseo
100 revision of AliDCSValue: Removed wrapper classes, reduced storage size per value by factor 2
102 Revision 1.4 2006/06/12 09:11:16 jgrosseo
103 coding conventions (Alberto)
105 Revision 1.3 2006/06/06 14:26:40 jgrosseo
106 o) removed files that were moved to STEER
107 o) shuttle updated to follow the new interface (Alberto)
109 Revision 1.2 2006/03/07 07:52:34 hristov
110 New version (B.Yordanov)
112 Revision 1.6 2005/11/19 17:19:14 byordano
113 RetrieveDATEEntries and RetrieveConditionsData added
115 Revision 1.5 2005/11/19 11:09:27 byordano
116 AliShuttle declaration added
118 Revision 1.4 2005/11/17 17:47:34 byordano
119 TList changed to TObjArray
121 Revision 1.3 2005/11/17 14:43:23 byordano
124 Revision 1.1.1.1 2005/10/28 07:33:58 hristov
125 Initial import as subdirectory in AliRoot
127 Revision 1.2 2005/09/13 08:41:15 byordano
128 default startTime endTime added
130 Revision 1.4 2005/08/30 09:13:02 byordano
133 Revision 1.3 2005/08/29 21:15:47 byordano
139 // This class is the main manager for AliShuttle.
140 // It organizes the data retrieval from DCS and call the
141 // interface methods of AliPreprocessor.
142 // For every detector in AliShuttleConfgi (see AliShuttleConfig),
143 // data for its set of aliases is retrieved. If there is registered
144 // AliPreprocessor for this detector then it will be used
145 // accroding to the schema (see AliPreprocessor).
146 // If there isn't registered AliPreprocessor than the retrieved
147 // data is stored automatically to the undelying AliCDBStorage.
148 // For detSpec is used the alias name.
151 #include "AliShuttle.h"
153 #include "AliCDBManager.h"
154 #include "AliCDBStorage.h"
155 #include "AliCDBId.h"
156 #include "AliCDBRunRange.h"
157 #include "AliCDBPath.h"
158 #include "AliCDBEntry.h"
159 #include "AliShuttleConfig.h"
160 #include "DCSClient/AliDCSClient.h"
162 #include "AliPreprocessor.h"
163 #include "AliShuttleStatus.h"
164 #include "AliShuttleLogbookEntry.h"
169 #include <TTimeStamp.h>
170 #include <TObjString.h>
171 #include <TSQLServer.h>
172 #include <TSQLResult.h>
176 #include <TMonaLisaWriter.h>
180 #include <sys/types.h>
181 #include <sys/wait.h>
185 Bool_t AliShuttle::fgkProcessDCS(kTRUE);
187 //______________________________________________________________________________________________
188 AliShuttle::AliShuttle(const AliShuttleConfig* config,
189 UInt_t timeout, Int_t retries):
191 fTimeout(timeout), fRetries(retries),
203 // config: AliShuttleConfig used
204 // timeout: timeout used for AliDCSClient connection
205 // retries: the number of retries in case of connection error.
208 if (!fConfig->IsValid()) AliFatal("********** !!!!! Invalid configuration !!!!! **********");
209 for(int iSys=0;iSys<4;iSys++) {
212 fFXSlist[iSys].SetOwner(kTRUE);
214 fPreprocessorMap.SetOwner(kTRUE);
216 for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
217 fFirstUnprocessed[iDet] = kFALSE;
219 fMonitoringMutex = new TMutex();
222 //______________________________________________________________________________________________
223 AliShuttle::~AliShuttle()
227 fPreprocessorMap.DeleteAll();
228 for(int iSys=0;iSys<4;iSys++)
230 fServer[iSys]->Close();
231 delete fServer[iSys];
240 if (fMonitoringMutex)
242 delete fMonitoringMutex;
243 fMonitoringMutex = 0;
247 //______________________________________________________________________________________________
248 void AliShuttle::RegisterPreprocessor(AliPreprocessor* preprocessor)
251 // Registers new AliPreprocessor.
252 // It uses GetName() for indentificator of the pre processor.
253 // The pre processor is registered it there isn't any other
254 // with the same identificator (GetName()).
257 const char* detName = preprocessor->GetName();
258 if(GetDetPos(detName) < 0)
259 AliFatal(Form("********** !!!!! Invalid detector name: %s !!!!! **********", detName));
261 if (fPreprocessorMap.GetValue(detName)) {
262 AliWarning(Form("AliPreprocessor %s is already registered!", detName));
266 fPreprocessorMap.Add(new TObjString(detName), preprocessor);
268 //______________________________________________________________________________________________
269 UInt_t AliShuttle::Store(const AliCDBPath& path, TObject* object,
270 AliCDBMetaData* metaData, Int_t validityStart, Bool_t validityInfinite)
272 // Stores a CDB object in the storage for offline reconstruction. Objects that are not needed for
273 // offline reconstruction, but should be stored anyway (e.g. for debugging) should NOT be stored
274 // using this function. Use StoreReferenceData instead!
275 // It calls WriteToCDB function which perform actual storage
277 return WriteToCDB(fgkMainCDB, fgkLocalCDB, path, object,
278 metaData, validityStart, validityInfinite);
282 //______________________________________________________________________________________________
283 UInt_t AliShuttle::StoreReferenceData(const AliCDBPath& path, TObject* object, AliCDBMetaData* metaData)
285 // Stores a CDB object in the storage for reference data. This objects will not be available during
286 // offline reconstrunction. Use this function for reference data only!
287 // It calls WriteToCDB function which perform actual storage
289 return WriteToCDB(fgkMainRefStorage, fgkLocalRefStorage, path, object, metaData);
293 //______________________________________________________________________________________________
294 UInt_t AliShuttle::WriteToCDB(const char* mainUri, const char* localUri,
295 const AliCDBPath& path, TObject* object, AliCDBMetaData* metaData,
296 Int_t validityStart, Bool_t validityInfinite)
298 // write object into the CDB. Parameters are passed by Store and StoreReferenceData functions.
299 // The parameters are:
300 // 1) Uri of the main storage (Grid)
301 // 2) Uri of the backup storage (Local)
302 // 3) the object's path.
303 // 4) the object to be stored
304 // 5) the metaData to be associated with the object
305 // 6) the validity start run number w.r.t. the current run,
306 // if the data is valid only for this run leave the default 0
307 // 7) specifies if the calibration data is valid for infinity (this means until updated),
308 // typical for calibration runs, the default is kFALSE
311 // 1 if stored in main (Grid) storage
312 // 2 if stored in backup (Local) storage
314 const char* cdbType = (mainUri == fgkMainCDB) ? "CDB" : "Reference";
316 Int_t firstRun = GetCurrentRun() - validityStart;
318 AliError("First valid run happens to be less than 0! Setting it to 0.");
323 if(validityInfinite) {
324 lastRun = AliCDBRunRange::Infinity();
326 lastRun = GetCurrentRun();
329 AliCDBId id(path, firstRun, lastRun, -1, -1);
331 if(! dynamic_cast<TObjString*> (metaData->GetProperty("RunUsed(TObjString)"))){
332 TObjString runUsed = Form("%d", GetCurrentRun());
333 metaData->SetProperty("RunUsed(TObjString)", runUsed.Clone());
338 if (!(AliCDBManager::Instance()->GetStorage(mainUri))) {
339 AliError(Form("WriteToCDB - Cannot activate main %s storage", cdbType));
341 result = (UInt_t) AliCDBManager::Instance()->GetStorage(mainUri)
342 ->Put(object, id, metaData);
347 Log(fCurrentDetector,
348 Form("WriteToCDB - Problem with main %s storage. Putting <%s> into backup storage",
349 cdbType, path.GetPath().Data()));
351 // Set Grid version to current run number, to ease retrieval later
352 id.SetVersion(GetCurrentRun());
354 result = AliCDBManager::Instance()->GetStorage(localUri)
355 ->Put(object, id, metaData);
361 Log(fCurrentDetector, "WriteToCDB - Can't store data!");
369 //______________________________________________________________________________________________
370 AliShuttleStatus* AliShuttle::ReadShuttleStatus()
372 // Reads the AliShuttleStatus from the CDB
379 fStatusEntry = AliCDBManager::Instance()->GetStorage(GetLocalCDB())
380 ->Get(Form("/SHUTTLE/STATUS/%s", fCurrentDetector.Data()), GetCurrentRun());
382 if (!fStatusEntry) return 0;
383 fStatusEntry->SetOwner(1);
385 AliShuttleStatus* status = dynamic_cast<AliShuttleStatus*> (fStatusEntry->GetObject());
387 AliError("Invalid object stored to CDB!");
394 //______________________________________________________________________________________________
395 Bool_t AliShuttle::WriteShuttleStatus(AliShuttleStatus* status)
397 // writes the status for one subdetector
404 Int_t run = GetCurrentRun();
406 AliCDBId id(AliCDBPath("SHUTTLE", "STATUS", fCurrentDetector), run, run);
408 fStatusEntry = new AliCDBEntry(status, id, new AliCDBMetaData);
409 fStatusEntry->SetOwner(1);
411 UInt_t result = AliCDBManager::Instance()->GetStorage(fgkLocalCDB)->Put(fStatusEntry);
414 AliError(Form("WriteShuttleStatus for %s, run %d failed", fCurrentDetector.Data(), run));
423 //______________________________________________________________________________________________
424 void AliShuttle::UpdateShuttleStatus(AliShuttleStatus::Status newStatus, Bool_t increaseCount)
426 // changes the AliShuttleStatus for the given detector and run to the given status
429 AliError("UNEXPECTED: fStatusEntry empty");
433 AliShuttleStatus* status = dynamic_cast<AliShuttleStatus*> (fStatusEntry->GetObject());
436 AliError("UNEXPECTED: status could not be read from current CDB entry");
440 TString actionStr = Form("UpdateShuttleStatus - %s: Changing state from %s to %s",
441 fCurrentDetector.Data(),
442 status->GetStatusName(),
443 status->GetStatusName(newStatus));
444 Log("SHUTTLE", actionStr);
445 SetLastAction(actionStr);
447 status->SetStatus(newStatus);
448 if (increaseCount) status->IncreaseCount();
450 AliCDBManager::Instance()->GetStorage(fgkLocalCDB)->Put(fStatusEntry);
455 //______________________________________________________________________________________________
456 void AliShuttle::SendMLInfo()
459 // sends ML information about the current status of the current detector being processed
462 AliShuttleStatus* status = dynamic_cast<AliShuttleStatus*> (fStatusEntry->GetObject());
465 AliError("UNEXPECTED: status could not be read from current CDB entry");
469 TMonaLisaText mlStatus(Form("%s_status", fCurrentDetector.Data()), status->GetStatusName());
470 TMonaLisaValue mlRetryCount(Form("%s_count", fCurrentDetector.Data()), status->GetCount());
473 mlList.Add(&mlStatus);
474 mlList.Add(&mlRetryCount);
476 fMonaLisa->SendParameters(&mlList);
479 //______________________________________________________________________________________________
480 Bool_t AliShuttle::ContinueProcessing()
482 // this function reads the AliShuttleStatus information from CDB and
483 // checks if the processing should be continued
484 // if yes it returns kTRUE and updates the AliShuttleStatus with nextStatus
486 if (!fConfig->HostProcessDetector(fCurrentDetector)) return kFALSE;
488 AliPreprocessor* aPreprocessor =
489 dynamic_cast<AliPreprocessor*> (fPreprocessorMap.GetValue(fCurrentDetector));
492 AliInfo(Form("%s: no preprocessor registered", fCurrentDetector.Data()));
496 AliShuttleLogbookEntry::Status entryStatus =
497 fLogbookEntry->GetDetectorStatus(fCurrentDetector);
499 if(entryStatus != AliShuttleLogbookEntry::kUnprocessed) {
500 AliInfo(Form("ContinueProcessing - %s is %s",
501 fCurrentDetector.Data(),
502 fLogbookEntry->GetDetectorStatusName(entryStatus)));
506 // if we get here, according to Shuttle logbook subdetector is in UNPROCESSED state
508 // check if current run is first unprocessed run for current detector
509 if (fConfig->StrictRunOrder(fCurrentDetector) &&
510 !fFirstUnprocessed[GetDetPos(fCurrentDetector)])
512 Log("SHUTTLE", Form("ContinueProcessing - %s requires strict run ordering but this is not the first unprocessed run!"));
516 AliShuttleStatus* status = ReadShuttleStatus();
519 Log("SHUTTLE", Form("ContinueProcessing - %s: Processing first time",
520 fCurrentDetector.Data()));
521 status = new AliShuttleStatus(AliShuttleStatus::kStarted);
522 return WriteShuttleStatus(status);
525 // The following two cases shouldn't happen if Shuttle Logbook was correctly updated.
526 // If it happens it may mean Logbook updating failed... let's do it now!
527 if (status->GetStatus() == AliShuttleStatus::kDone ||
528 status->GetStatus() == AliShuttleStatus::kFailed){
529 Log("SHUTTLE", Form("ContinueProcessing - %s is already %s. Updating Shuttle Logbook",
530 fCurrentDetector.Data(),
531 status->GetStatusName(status->GetStatus())));
532 UpdateShuttleLogbook(fCurrentDetector.Data(),
533 status->GetStatusName(status->GetStatus()));
537 if (status->GetStatus() == AliShuttleStatus::kStoreFailed) {
539 Form("ContinueProcessing - %s: Grid storage of one or more objects failed. Trying again now",
540 fCurrentDetector.Data()));
541 if(TryToStoreAgain()){
542 Log(fCurrentDetector.Data(), "ContinueProcessing - All objects successfully stored into OCDB");
543 UpdateShuttleStatus(AliShuttleStatus::kDone);
544 UpdateShuttleLogbook(fCurrentDetector.Data(), "DONE");
547 Form("ContinueProcessing - %s: Grid storage failed again",
548 fCurrentDetector.Data()));
549 // trigger ML information manually because we do not had a status change
555 // if we get here, there is a restart
556 Bool_t cont = kFALSE;
559 if (status->GetCount() >= fConfig->GetMaxRetries()) {
560 Log("SHUTTLE", Form("ContinueProcessing - %s failed %d times in status %s - "
561 "Updating Shuttle Logbook", fCurrentDetector.Data(),
562 status->GetCount(), status->GetStatusName()));
563 UpdateShuttleLogbook(fCurrentDetector.Data(), "FAILED");
564 UpdateShuttleStatus(AliShuttleStatus::kFailed);
566 Log("SHUTTLE", Form("ContinueProcessing - %s: restarting. "
567 "Aborted before with %s. Retry number %d.", fCurrentDetector.Data(),
568 status->GetStatusName(), status->GetCount()));
569 UpdateShuttleStatus(AliShuttleStatus::kStarted, kTRUE);
573 // Send mail to detector expert!
574 AliInfo(Form("Sending mail to %s expert...", fCurrentDetector.Data()));
576 Log("SHUTTLE", Form("ContinueProcessing - Could not send mail to %s expert",
577 fCurrentDetector.Data()));
582 //______________________________________________________________________________________________
583 Bool_t AliShuttle::Process(AliShuttleLogbookEntry* entry)
586 // Makes data retrieval for all detectors in the configuration.
587 // entry: Shuttle logbook entry, contains run paramenters and status of detectors
588 // (Unprocessed, Inactive, Failed or Done).
589 // Returns kFALSE in case of error occured and kTRUE otherwise
592 if(!entry) return kFALSE;
594 fLogbookEntry = entry;
596 if (fLogbookEntry->IsDone())
598 Log("SHUTTLE","Process - Shuttle is already DONE. Updating logbook");
599 UpdateShuttleLogbook("shuttle_done");
604 // create ML instance that monitors this run
605 fMonaLisa = new TMonaLisaWriter(Form("%d", GetCurrentRun()), "SHUTTLE", "aliendb1.cern.ch");
606 // disable monitoring of other parameters that come e.g. from TFile
607 gMonitoringWriter = 0;
609 AliInfo(Form("\n\n \t\t\t^*^*^*^*^*^*^*^*^*^*^*^* run %d: START ^*^*^*^*^*^*^*^*^*^*^*^* \n",
613 // Send the information to ML
614 TMonaLisaText mlStatus("SHUTTLE_status", "Processing");
617 mlList.Add(&mlStatus);
619 fMonaLisa->SendParameters(&mlList);
621 fLogbookEntry->Print("all");
624 Bool_t hasError = kFALSE;
626 AliCDBStorage *mainCDBSto = AliCDBManager::Instance()->GetStorage(fgkMainCDB);
627 if(mainCDBSto) mainCDBSto->QueryCDB(GetCurrentRun());
628 AliCDBStorage *mainRefSto = AliCDBManager::Instance()->GetStorage(fgkMainRefStorage);
629 if(mainRefSto) mainRefSto->QueryCDB(GetCurrentRun());
631 // Loop on detectors in the configuration
632 TIter iter(fConfig->GetDetectors());
633 TObjString* aDetector = 0;
635 while ((aDetector = (TObjString*) iter.Next()))
637 fCurrentDetector = aDetector->String();
639 if (ContinueProcessing() == kFALSE) continue;
641 AliInfo(Form("\n\n \t\t\t****** run %d - %s: START ******",
642 GetCurrentRun(), aDetector->GetName()));
644 for(Int_t iSys=0;iSys<3;iSys++) fFXSCalled[iSys]=kFALSE;
646 Log(fCurrentDetector.Data(), "Starting processing");
652 Log("SHUTTLE", "ERROR: Forking failed");
657 AliInfo(Form("In parent process of %d - %s: Starting monitoring",
658 GetCurrentRun(), aDetector->GetName()));
660 Long_t begin = time(0);
662 int status; // to be used with waitpid, on purpose an int (not Int_t)!
663 while (waitpid(pid, &status, WNOHANG) == 0)
665 Long_t expiredTime = time(0) - begin;
667 if (expiredTime > fConfig->GetPPTimeOut())
669 Log("SHUTTLE", Form("Process time out. Run time: %d seconds. Killing...",
676 gSystem->Sleep(1000);
680 if (expiredTime % 60 == 0)
681 Log("SHUTTLE", Form("Checked process. Run time: %d seconds.",
683 gSystem->Sleep(1000);
687 AliInfo(Form("In parent process of %d - %s: Client has terminated.",
688 GetCurrentRun(), aDetector->GetName()));
690 if (WIFEXITED(status))
692 Int_t returnCode = WEXITSTATUS(status);
694 Log("SHUTTLE", Form("The return code is %d", returnCode));
703 AliInfo(Form("In client process of %d - %s", GetCurrentRun(), aDetector->GetName()));
705 UInt_t result = ProcessCurrentDetector();
707 Int_t returnCode = 0; // will be set to 1 in case of an error
712 AliInfo(Form("\n \t\t\t****** run %d - %s: PREPROCESSOR ERROR ****** \n\n",
713 GetCurrentRun(), aDetector->GetName()));
715 else if (result == 2)
717 AliInfo(Form("\n \t\t\t****** run %d - %s: STORAGE ERROR ****** \n\n",
718 GetCurrentRun(), aDetector->GetName()));
721 AliInfo(Form("\n \t\t\t****** run %d - %s: DONE ****** \n\n",
722 GetCurrentRun(), aDetector->GetName()));
727 // Process successful: Update time_processed field in FXS logbooks!
728 if (UpdateTable() == kFALSE) returnCode = 1;
731 for (UInt_t iSys=0; iSys<3; iSys++)
733 if (fFXSCalled[iSys]) fFXSlist[iSys].Clear();
736 AliInfo(Form("Client process of %d - %s is exiting now with %d.",
737 GetCurrentRun(), aDetector->GetName(), returnCode));
739 // the client exits here
740 gSystem->Exit(returnCode);
742 AliError("We should never get here!!!");
746 AliInfo(Form("\n\n \t\t\t^*^*^*^*^*^*^*^*^*^*^*^* run %d: FINISH ^*^*^*^*^*^*^*^*^*^*^*^* \n",
749 //check if shuttle is done for this run, if so update logbook
750 TObjArray checkEntryArray;
751 checkEntryArray.SetOwner(1);
752 TString whereClause = Form("where run=%d", GetCurrentRun());
753 if (!QueryShuttleLogbook(whereClause.Data(), checkEntryArray) || checkEntryArray.GetEntries() == 0) {
754 Log("SHUTTLE", Form("Process - Warning: Cannot check status of run %d on Shuttle logbook!",
756 return hasError == kFALSE;
759 AliShuttleLogbookEntry* checkEntry = dynamic_cast<AliShuttleLogbookEntry*>
760 (checkEntryArray.At(0));
764 if (checkEntry->IsDone())
766 Log("SHUTTLE","Process - Shuttle is DONE. Updating logbook");
767 UpdateShuttleLogbook("shuttle_done");
771 for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
773 if (checkEntry->GetDetectorStatus(iDet) == AliShuttleLogbookEntry::kUnprocessed)
775 AliDebug(2, Form("Run %d: setting %s as \"not first time unprocessed\"",
776 checkEntry->GetRun(), GetDetName(iDet)));
777 fFirstUnprocessed[iDet] = kFALSE;
783 // remove ML instance
789 return hasError == kFALSE;
792 //______________________________________________________________________________________________
793 UInt_t AliShuttle::ProcessCurrentDetector()
796 // Makes data retrieval just for a specific detector (fCurrentDetector).
797 // Threre should be a configuration for this detector.
799 AliInfo(Form("Retrieving values for %s, run %d", fCurrentDetector.Data(), GetCurrentRun()));
801 UpdateShuttleStatus(AliShuttleStatus::kDCSStarted);
806 Bool_t aDCSError = kFALSE;
809 // TODO Test only... I've added a flag that allows to
810 // exclude DCS archive DB query
813 AliInfo("Skipping DCS processing!");
816 TString host(fConfig->GetDCSHost(fCurrentDetector));
817 Int_t port = fConfig->GetDCSPort(fCurrentDetector);
819 // Retrieval of Aliases
820 TObjString* anAlias = 0;
822 Int_t nTotAliases= ((TMap*)fConfig->GetDCSAliases(fCurrentDetector))->GetEntries();
823 TIter iterAliases(fConfig->GetDCSAliases(fCurrentDetector));
824 while ((anAlias = (TObjString*) iterAliases.Next()))
826 TObjArray *valueSet = new TObjArray();
827 valueSet->SetOwner(1);
829 if (((iAlias-1) % 500) == 0 || iAlias == nTotAliases)
830 AliInfo(Form("Querying DCS archive: alias %s (%d of %d)",
831 anAlias->GetName(), iAlias++, nTotAliases));
832 aDCSError = (GetValueSet(host, port, anAlias->String(), valueSet, kAlias) == 0);
836 dcsMap.Add(anAlias->Clone(), valueSet);
838 Log(fCurrentDetector,
839 Form("ProcessCurrentDetector - Error while retrieving alias %s",
840 anAlias->GetName()));
841 UpdateShuttleStatus(AliShuttleStatus::kDCSError);
847 // Retrieval of Data Points
850 Int_t nTotDPs= ((TMap*)fConfig->GetDCSDataPoints(fCurrentDetector))->GetEntries();
851 TIter iterDP(fConfig->GetDCSDataPoints(fCurrentDetector));
852 while ((aDP = (TObjString*) iterDP.Next()))
854 TObjArray *valueSet = new TObjArray();
855 valueSet->SetOwner(1);
856 if (((iDP-1) % 500) == 0 || iDP == nTotDPs)
857 AliInfo(Form("Querying DCS archive: DP %s (%d of %d)",
858 aDP->GetName(), iDP++, nTotDPs));
859 aDCSError = (GetValueSet(host, port, aDP->String(), valueSet, kDP) == 0);
863 dcsMap.Add(aDP->Clone(), valueSet);
865 Log(fCurrentDetector,
866 Form("ProcessCurrentDetector - Error while retrieving data point %s",
868 UpdateShuttleStatus(AliShuttleStatus::kDCSError);
875 // DCS Archive DB processing successful. Call Preprocessor!
876 UpdateShuttleStatus(AliShuttleStatus::kPPStarted);
878 AliPreprocessor* aPreprocessor =
879 dynamic_cast<AliPreprocessor*> (fPreprocessorMap.GetValue(fCurrentDetector));
881 aPreprocessor->Initialize(GetCurrentRun(), GetCurrentStartTime(), GetCurrentEndTime());
882 UInt_t aPPResult = aPreprocessor->Process(&dcsMap);
884 UInt_t returnValue = 0;
885 if (aPPResult == 0) { // Preprocessor error
886 UpdateShuttleStatus(AliShuttleStatus::kPPError);
888 } else if (fGridError == kFALSE) { // process and Grid storage ok!
889 UpdateShuttleStatus(AliShuttleStatus::kDone);
890 UpdateShuttleLogbook(fCurrentDetector, "DONE");
891 Log(fCurrentDetector.Data(),
892 "ProcessCurrentDetector - Preprocessor and Grid storage ended successfully");
894 } else { // Grid storage error (process ok, but object put in local storage)
895 UpdateShuttleStatus(AliShuttleStatus::kStoreFailed);
904 //______________________________________________________________________________________________
905 Bool_t AliShuttle::QueryShuttleLogbook(const char* whereClause,
908 // Query DAQ's Shuttle logbook and fills detector status object.
909 // Call QueryRunParameters to query DAQ logbook for run parameters.
913 // check connection, in case connect
914 if(!Connect(3)) return kFALSE;
917 sqlQuery = Form("select * from logbook_shuttle %s order by run", whereClause);
919 TSQLResult* aResult = fServer[3]->Query(sqlQuery);
921 AliError(Form("Can't execute query <%s>!", sqlQuery.Data()));
925 AliDebug(2,Form("Query = %s", sqlQuery.Data()));
927 if(aResult->GetRowCount() == 0) {
928 // if(sqlQuery.EndsWith("where shuttle_done=0 order by run")){
929 // Log("SHUTTLE", "QueryShuttleLogbook - All runs in Shuttle Logbook are already DONE");
933 AliInfo("No entries in Shuttle Logbook match request");
939 // TODO Check field count!
940 const UInt_t nCols = 22;
941 if (aResult->GetFieldCount() != (Int_t) nCols) {
942 AliError("Invalid SQL result field number!");
948 while ((aRow = aResult->Next())) {
949 TString runString(aRow->GetField(0), aRow->GetFieldLength(0));
950 Int_t run = runString.Atoi();
952 AliShuttleLogbookEntry *entry = QueryRunParameters(run);
957 for(UInt_t ii = 0; ii < nCols; ii++)
958 entry->SetDetectorStatus(aResult->GetFieldName(ii), aRow->GetField(ii));
960 entries.AddLast(entry);
964 // if(sqlQuery.EndsWith("where shuttle_done=0 order by run"))
965 // Log("SHUTTLE", Form("QueryShuttleLogbook - Found %d unprocessed runs in Shuttle Logbook",
966 // entries.GetEntriesFast()));
971 //______________________________________________________________________________________________
972 AliShuttleLogbookEntry* AliShuttle::QueryRunParameters(Int_t run)
975 // Retrieve run parameters written in the DAQ logbook and sets them into AliShuttleLogbookEntry object
978 // check connection, in case connect
983 sqlQuery.Form("select * from %s where run=%d", fConfig->GetDAQlbTable(), run);
985 TSQLResult* aResult = fServer[3]->Query(sqlQuery);
987 AliError(Form("Can't execute query <%s>!", sqlQuery.Data()));
991 if (aResult->GetRowCount() == 0) {
992 Log("SHUTTLE", Form("QueryRunParameters - No entry in DAQ Logbook for run %d. Skipping", run));
997 if (aResult->GetRowCount() > 1) {
998 AliError(Form("More than one entry in DAQ Logbook for run %d. Skipping", run));
1003 TSQLRow* aRow = aResult->Next();
1006 AliError(Form("Could not retrieve row for run %d. Skipping", run));
1011 AliShuttleLogbookEntry* entry = new AliShuttleLogbookEntry(run);
1013 for (Int_t ii = 0; ii < aResult->GetFieldCount(); ii++)
1014 entry->SetRunParameter(aResult->GetFieldName(ii), aRow->GetField(ii));
1016 UInt_t startTime = entry->GetStartTime();
1017 UInt_t endTime = entry->GetEndTime();
1019 if (!startTime || !endTime || startTime > endTime) {
1021 Form("QueryRunParameters - Invalid parameters for Run %d: startTime = %d, endTime = %d",
1022 run, startTime, endTime));
1035 //______________________________________________________________________________________________
1036 Bool_t AliShuttle::TryToStoreAgain()
1038 // Called in case the detector failed to store the object in Grid OCDB
1039 // It tries to store the object again, if it does not find more recent and overlapping objects
1040 // Calls underlying TryToStoreAgain(const char*) function twice, for OCDB and Reference storage.
1042 AliInfo("Trying to store OCDB data again...");
1043 Bool_t resultCDB = TryToStoreAgain(fgkMainCDB);
1045 AliInfo("Trying to store reference data again...");
1046 Bool_t resultRef = TryToStoreAgain(fgkMainRefStorage);
1048 return resultCDB && resultRef;
1051 //______________________________________________________________________________________________
1052 Bool_t AliShuttle::TryToStoreAgain(TString& gridURI)
1054 // Called by TryToStoreAgain(), performs actual storage retry
1056 TObjArray* gridIds=0;
1058 Bool_t result = kTRUE;
1060 const char* type = 0;
1062 if(gridURI == fgkMainCDB) {
1064 backupURI = fgkLocalCDB;
1065 } else if(gridURI == fgkMainRefStorage) {
1067 backupURI = fgkLocalRefStorage;
1069 AliError(Form("Invalid storage URI: %s", gridURI.Data()));
1073 AliCDBManager* man = AliCDBManager::Instance();
1075 AliCDBStorage *gridSto = man->GetStorage(gridURI);
1077 Log(fCurrentDetector.Data(),
1078 Form("TryToStoreAgain - cannot activate main %s storage", type));
1082 gridIds = gridSto->GetQueryCDBList();
1084 // get objects previously stored in local CDB
1085 AliCDBStorage *backupSto = man->GetStorage(backupURI);
1086 AliCDBPath aPath(GetOfflineDetName(fCurrentDetector.Data()),"*","*");
1087 // Local objects were stored with current run as Grid version!
1088 TList* localEntries = backupSto->GetAll(aPath.GetPath(), GetCurrentRun(), GetCurrentRun());
1089 localEntries->SetOwner(1);
1091 // loop on local stored objects
1092 TIter localIter(localEntries);
1093 AliCDBEntry *aLocEntry = 0;
1094 while((aLocEntry = dynamic_cast<AliCDBEntry*> (localIter.Next()))){
1095 aLocEntry->SetOwner(1);
1096 AliCDBId aLocId = aLocEntry->GetId();
1097 aLocEntry->SetVersion(-1);
1098 aLocEntry->SetSubVersion(-1);
1100 // loop on Grid valid Id's
1101 Bool_t store = kTRUE;
1102 TIter gridIter(gridIds);
1103 AliCDBId* aGridId = 0;
1104 while((aGridId = dynamic_cast<AliCDBId*> (gridIter.Next()))){
1105 // If local object is valid up to infinity we store it only if it is
1106 // the first unprocessed run!
1107 if (aLocId.GetLastRun() == AliCDBRunRange::Infinity())
1109 if (!fFirstUnprocessed[GetDetPos(fCurrentDetector)])
1111 Log(fCurrentDetector.Data(),
1112 ("TryToStoreAgain - This object has validity infinite but "
1113 "there are previous unprocessed runs!"));
1119 if(aGridId->GetPath() != aLocId.GetPath()) continue;
1120 // skip all objects valid up to infinity
1121 if(aGridId->GetLastRun() == AliCDBRunRange::Infinity()) continue;
1122 // if we get here, it means there's already some more recent object stored on Grid!
1128 Log(fCurrentDetector.Data(),
1129 Form("TryToStoreAgain - A more recent object already exists in %s storage: <%s>",
1130 type, aGridId->ToString().Data()));
1131 // removing local filename...
1132 // TODO maybe it's better not to remove it, it was not copied to the Grid!
1134 backupSto->IdToFilename(aLocId, filename);
1135 AliInfo(Form("Removing local file %s", filename.Data()));
1136 gSystem->Exec(Form("rm %s",filename.Data()));
1140 // If we get here, the file can be stored!
1141 Bool_t storeOk = gridSto->Put(aLocEntry);
1143 Log(fCurrentDetector.Data(),
1144 Form("TryToStoreAgain - Object <%s> successfully put into %s storage",
1145 aLocId.ToString().Data(), type));
1147 // removing local filename...
1149 backupSto->IdToFilename(aLocId, filename);
1150 AliInfo(Form("Removing local file %s", filename.Data()));
1151 gSystem->Exec(Form("rm %s", filename.Data()));
1154 Log(fCurrentDetector.Data(),
1155 Form("TryToStoreAgain - Grid %s storage of object <%s> failed again",
1156 type, aLocId.ToString().Data()));
1160 localEntries->Clear();
1165 //______________________________________________________________________________________________
1166 Bool_t AliShuttle::GetValueSet(const char* host, Int_t port, const char* entry,
1167 TObjArray* valueSet, DCSType type)
1169 // Retrieve all "entry" data points from the DCS server
1170 // host, port: TSocket connection parameters
1171 // entry: name of the alias or data point
1172 // valueSet: array of retrieved AliDCSValue's
1173 // type: kAlias or kDP
1175 AliDCSClient client(host, port, fTimeout, fRetries);
1176 if (!client.IsConnected())
1185 result = client.GetAliasValues(entry,
1186 GetCurrentStartTime(), GetCurrentEndTime(), valueSet);
1190 result = client.GetDPValues(entry,
1191 GetCurrentStartTime(), GetCurrentEndTime(), valueSet);
1196 Log(fCurrentDetector.Data(), Form("GetValueSet - Can't get '%s'! Reason: %s",
1197 entry, AliDCSClient::GetErrorString(result)));
1199 if (result == AliDCSClient::fgkServerError)
1201 Log(fCurrentDetector.Data(), Form("GetValueSet - Server error: %s",
1202 client.GetServerError().Data()));
1211 //______________________________________________________________________________________________
1212 const char* AliShuttle::GetFile(Int_t system, const char* detector,
1213 const char* id, const char* source)
1215 // Get calibration file from file exchange servers
1216 // First queris the FXS database for the file name, using the run, detector, id and source info
1217 // then calls RetrieveFile(filename) for actual copy to local disk
1218 // run: current run being processed (given by Logbook entry fLogbookEntry)
1219 // detector: the Preprocessor name
1220 // id: provided as a parameter by the Preprocessor
1221 // source: provided by the Preprocessor through GetFileSources function
1223 // check connection, in case connect
1224 if (!Connect(system))
1226 Log(detector, Form("GetFile - Couldn't connect to %s FXS database", GetSystemName(system)));
1230 // Query preparation
1231 TString sqlQueryStart;
1232 TString whereClause;
1233 TString sourceName(source);
1237 sqlQueryStart = Form("select filePath,size from %s where", fConfig->GetFXSdbTable(system));
1238 whereClause = Form("run=%d and detector=\"%s\" and fileId=\"%s\" and DAQsource=\"%s\"",
1239 GetCurrentRun(), detector, id, source);
1243 else if (system == kDCS)
1245 sqlQueryStart = Form("select filePath,size from %s where", fConfig->GetFXSdbTable(system));
1246 whereClause = Form("run=%d and detector=\"%s\" and fileId=\"%s\"",
1247 GetCurrentRun(), detector, id);
1251 else if (system == kHLT)
1253 sqlQueryStart = Form("select filePath,fileSize,fileChecksum from %s where",
1254 fConfig->GetFXSdbTable(system));
1255 whereClause = Form("run=%d and detector=\"%s\" and fileId=\"%s\" and DDLnumbers=\"%s\"",
1256 GetCurrentRun(), detector, id, source);
1260 TString sqlQuery = Form("%s %s", sqlQueryStart.Data(), whereClause.Data());
1262 AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
1265 TSQLResult* aResult = 0;
1266 aResult = dynamic_cast<TSQLResult*> (fServer[system]->Query(sqlQuery));
1268 Log(detector, Form("GetFileName - Can't execute SQL query to %s database for: id = %s, source = %s",
1269 GetSystemName(system), id, sourceName.Data()));
1273 if(aResult->GetRowCount() == 0)
1276 Form("GetFileName - No entry in %s FXS db for: id = %s, source = %s",
1277 GetSystemName(system), id, sourceName.Data()));
1282 if (aResult->GetRowCount() > 1) {
1284 Form("GetFileName - More than one entry in %s FXS db for: id = %s, source = %s",
1285 GetSystemName(system), id, sourceName.Data()));
1290 if (aResult->GetFieldCount() != nFields) {
1292 Form("GetFileName - Wrong field count in %s FXS db for: id = %s, source = %s",
1293 GetSystemName(system), id, sourceName.Data()));
1298 TSQLRow* aRow = dynamic_cast<TSQLRow*> (aResult->Next());
1301 Log(detector, Form("GetFileName - Empty set result in %s FXS db from query: id = %s, source = %s",
1302 GetSystemName(system), id, sourceName.Data()));
1307 TString filePath(aRow->GetField(0), aRow->GetFieldLength(0));
1308 TString fileSize(aRow->GetField(1), aRow->GetFieldLength(1));
1310 if(system == kHLT) fileMd5Sum = aRow->GetField(2);
1315 AliDebug(2, Form("filePath = %s",filePath.Data()));
1317 // retrieved file is renamed to make it unique
1318 TString localFileName = Form("%s_%s_%d_%s_%s.shuttle",
1319 GetSystemName(system), detector, GetCurrentRun(), id, sourceName.Data());
1322 // file retrieval from FXS
1323 UInt_t nRetries = 0;
1324 UInt_t maxRetries = 3;
1325 Bool_t result = kFALSE;
1327 // copy!! if successful TSystem::Exec returns 0
1328 while(nRetries++ < maxRetries) {
1329 AliDebug(2, Form("Trying to copy file. Retry # %d", nRetries));
1330 result = RetrieveFile(system, filePath.Data(), localFileName.Data());
1333 Log(detector, Form("GetFileName - Copy of file %s from %s FXS failed",
1334 filePath.Data(), GetSystemName(system)));
1337 AliInfo(Form("File %s copied from %s FXS into %s/%s",
1338 filePath.Data(), GetSystemName(system),
1339 GetShuttleTempDir(), localFileName.Data()));
1344 // compare md5sum of local file with the one stored in the FXS DB
1345 Int_t md5Comp = gSystem->Exec(Form("md5sum %s/%s |grep %s 2>&1 > /dev/null",
1346 GetShuttleTempDir(), localFileName.Data(), fileMd5Sum.Data()));
1350 Log(detector, Form("GetFileName - md5sum of file %s does not match with local copy!",
1359 if(!result) return 0;
1361 fFXSCalled[system]=kTRUE;
1362 TObjString *fileParams = new TObjString(Form("%s#!?!#%s", id, sourceName.Data()));
1363 fFXSlist[system].Add(fileParams);
1365 static TString fullLocalFileName;
1366 fullLocalFileName = TString::Format("%s/%s", GetShuttleTempDir(), localFileName.Data());
1368 AliInfo(Form("fullLocalFileName = %s", fullLocalFileName.Data()));
1370 return fullLocalFileName.Data();
1374 //______________________________________________________________________________________________
1375 Bool_t AliShuttle::RetrieveFile(UInt_t system, const char* fxsFileName, const char* localFileName)
1377 // Copies file from FXS to local Shuttle machine
1379 // check temp directory: trying to cd to temp; if it does not exist, create it
1380 AliDebug(2, Form("Copy file %s from %s FXS into %s/%s",
1381 GetSystemName(system), fxsFileName, GetShuttleTempDir(), localFileName));
1383 void* dir = gSystem->OpenDirectory(GetShuttleTempDir());
1385 if (gSystem->mkdir(GetShuttleTempDir(), kTRUE)) {
1386 AliError(Form("Can't open directory <%s>", GetShuttleTempDir()));
1391 gSystem->FreeDirectory(dir);
1394 TString baseFXSFolder;
1397 baseFXSFolder = "FES/";
1399 else if (system == kDCS)
1403 else if (system == kHLT)
1405 baseFXSFolder = "~/";
1409 TString command = Form("scp -oPort=%d -2 %s@%s:%s%s %s/%s",
1410 fConfig->GetFXSPort(system),
1411 fConfig->GetFXSUser(system),
1412 fConfig->GetFXSHost(system),
1413 baseFXSFolder.Data(),
1415 GetShuttleTempDir(),
1418 AliDebug(2, Form("%s",command.Data()));
1420 Bool_t result = (gSystem->Exec(command.Data()) == 0);
1425 //______________________________________________________________________________________________
1426 TList* AliShuttle::GetFileSources(Int_t system, const char* detector, const char* id)
1428 // Get sources producing the condition file Id from file exchange servers
1432 AliError("DCS system has only one source of data!");
1437 // check connection, in case connect
1438 if (!Connect(system))
1440 Log(detector, Form("GetFile - Couldn't connect to %s FXS database", GetSystemName(system)));
1444 TString sourceName = 0;
1447 sourceName = "DAQsource";
1448 } else if (system == kHLT)
1450 sourceName = "DDLnumbers";
1453 TString sqlQueryStart = Form("select %s from %s where", sourceName.Data(), fConfig->GetFXSdbTable(kDAQ));
1454 TString whereClause = Form("run=%d and detector=\"%s\" and fileId=\"%s\"",
1455 GetCurrentRun(), detector, id);
1456 TString sqlQuery = Form("%s %s", sqlQueryStart.Data(), whereClause.Data());
1458 AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
1461 TSQLResult* aResult;
1462 aResult = fServer[system]->Query(sqlQuery);
1464 Log(detector, Form("GetFileSources - Can't execute SQL query to %s database for id: %s",
1465 GetSystemName(system), id));
1469 if (aResult->GetRowCount() == 0)
1472 Form("GetFileSources - No entry in %s FXS table for id: %s", GetSystemName(system), id));
1478 TList *list = new TList();
1481 while ((aRow = aResult->Next()))
1484 TString source(aRow->GetField(0), aRow->GetFieldLength(0));
1485 AliDebug(2, Form("%s = %s", sourceName.Data(), source.Data()));
1486 list->Add(new TObjString(source));
1495 //______________________________________________________________________________________________
1496 Bool_t AliShuttle::Connect(Int_t system)
1498 // Connect to MySQL Server of the system's FXS MySQL databases
1499 // DAQ Logbook, Shuttle Logbook and DAQ FXS db are on the same host
1501 // check connection: if already connected return
1502 if(fServer[system] && fServer[system]->IsConnected()) return kTRUE;
1504 TString dbHost, dbUser, dbPass, dbName;
1506 if (system < 3) // FXS db servers
1508 dbHost = Form("mysql://%s:%d", fConfig->GetFXSdbHost(system), fConfig->GetFXSdbPort(system));
1509 dbUser = fConfig->GetFXSdbUser(system);
1510 dbPass = fConfig->GetFXSdbPass(system);
1511 dbName = fConfig->GetFXSdbName(system);
1512 } else { // Run & Shuttle logbook servers
1513 // TODO Will the Shuttle logbook server be the same as the Run logbook server ???
1514 dbHost = Form("mysql://%s:%d", fConfig->GetDAQlbHost(), fConfig->GetDAQlbPort());
1515 dbUser = fConfig->GetDAQlbUser();
1516 dbPass = fConfig->GetDAQlbPass();
1517 dbName = fConfig->GetDAQlbDB();
1520 fServer[system] = TSQLServer::Connect(dbHost.Data(), dbUser.Data(), dbPass.Data());
1521 if (!fServer[system] || !fServer[system]->IsConnected()) {
1524 AliError(Form("Can't establish connection to FXS database for %s",
1525 AliShuttleInterface::GetSystemName(system)));
1527 AliError("Can't establish connection to Run logbook.");
1529 if(fServer[system]) delete fServer[system];
1534 TSQLResult* aResult=0;
1537 aResult = fServer[kDAQ]->GetTables(dbName.Data());
1540 aResult = fServer[kDCS]->GetTables(dbName.Data());
1543 aResult = fServer[kHLT]->GetTables(dbName.Data());
1546 aResult = fServer[3]->GetTables(dbName.Data());
1554 //______________________________________________________________________________________________
1555 Bool_t AliShuttle::UpdateTable()
1557 // Update FXS table filling time_processed field in all rows corresponding to current run and detector
1559 Bool_t result = kTRUE;
1561 for (UInt_t system=0; system<3; system++)
1563 if(!fFXSCalled[system]) continue;
1565 // check connection, in case connect
1566 if (!Connect(system))
1568 Log(fCurrentDetector, Form("UpdateTable - Couldn't connect to %s FXS database", GetSystemName(system)));
1573 TTimeStamp now; // now
1575 // Loop on FXS list entries
1576 TIter iter(&fFXSlist[system]);
1577 TObjString *aFXSentry=0;
1578 while ((aFXSentry = dynamic_cast<TObjString*> (iter.Next())))
1580 TString aFXSentrystr = aFXSentry->String();
1581 TObjArray *aFXSarray = aFXSentrystr.Tokenize("#!?!#");
1582 if (!aFXSarray || aFXSarray->GetEntries() != 2 )
1584 Log(fCurrentDetector, Form("UpdateTable - error updating %s FXS entry. Check string: <%s>",
1585 GetSystemName(system), aFXSentrystr.Data()));
1586 if(aFXSarray) delete aFXSarray;
1590 const char* fileId = ((TObjString*) aFXSarray->At(0))->GetName();
1591 const char* source = ((TObjString*) aFXSarray->At(1))->GetName();
1593 TString whereClause;
1596 whereClause = Form("where run=%d and detector=\"%s\" and fileId=\"%s\" and DAQsource=\"%s\";",
1597 GetCurrentRun(), fCurrentDetector.Data(), fileId, source);
1599 else if (system == kDCS)
1601 whereClause = Form("where run=%d and detector=\"%s\" and fileId=\"%s\";",
1602 GetCurrentRun(), fCurrentDetector.Data(), fileId);
1604 else if (system == kHLT)
1606 whereClause = Form("where run=%d and detector=\"%s\" and fileId=\"%s\" and DDLnumbers=\"%s\";",
1607 GetCurrentRun(), fCurrentDetector.Data(), fileId, source);
1612 TString sqlQuery = Form("update %s set time_processed=%d %s", fConfig->GetFXSdbTable(system),
1613 now.GetSec(), whereClause.Data());
1615 AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
1618 TSQLResult* aResult;
1619 aResult = dynamic_cast<TSQLResult*> (fServer[system]->Query(sqlQuery));
1622 Log(fCurrentDetector, Form("UpdateTable - %s db: can't execute SQL query <%s>",
1623 GetSystemName(system), sqlQuery.Data()));
1634 //______________________________________________________________________________________________
1635 Bool_t AliShuttle::UpdateShuttleLogbook(const char* detector, const char* status)
1638 // Update Shuttle logbook filling detector or shuttle_done column
1639 // ex. of usage: UpdateShuttleLogbook("PHOS", "DONE") or UpdateShuttleLogbook("shuttle_done")
1642 // check connection, in case connect
1644 Log("SHUTTLE", "UpdateShuttleLogbook - Couldn't connect to DAQ Logbook.");
1648 TString detName(detector);
1650 if(detName == "shuttle_done")
1652 setClause = "set shuttle_done=1";
1654 // Send the information to ML
1655 TMonaLisaText mlStatus("SHUTTLE_status", "Done");
1658 mlList.Add(&mlStatus);
1660 fMonaLisa->SendParameters(&mlList);
1662 TString statusStr(status);
1663 if(statusStr.Contains("done", TString::kIgnoreCase) ||
1664 statusStr.Contains("failed", TString::kIgnoreCase)){
1665 setClause = Form("set %s=\"%s\"", detector, status);
1668 Form("UpdateShuttleLogbook - Invalid status <%s> for detector %s",
1674 TString whereClause = Form("where run=%d", GetCurrentRun());
1676 TString sqlQuery = Form("update logbook_shuttle %s %s",
1677 setClause.Data(), whereClause.Data());
1679 AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
1682 TSQLResult* aResult;
1683 aResult = dynamic_cast<TSQLResult*> (fServer[3]->Query(sqlQuery));
1685 Log("SHUTTLE", Form("UpdateShuttleLogbook - Can't execute query <%s>", sqlQuery.Data()));
1693 //______________________________________________________________________________________________
1694 Int_t AliShuttle::GetCurrentRun() const
1696 // Get current run from logbook entry
1698 return fLogbookEntry ? fLogbookEntry->GetRun() : -1;
1701 //______________________________________________________________________________________________
1702 UInt_t AliShuttle::GetCurrentStartTime() const
1704 // get current start time
1706 return fLogbookEntry ? fLogbookEntry->GetStartTime() : 0;
1709 //______________________________________________________________________________________________
1710 UInt_t AliShuttle::GetCurrentEndTime() const
1712 // get current end time from logbook entry
1714 return fLogbookEntry ? fLogbookEntry->GetEndTime() : 0;
1717 //______________________________________________________________________________________________
1718 void AliShuttle::Log(const char* detector, const char* message)
1720 // Fill log string with a message
1722 void* dir = gSystem->OpenDirectory(GetShuttleLogDir());
1724 if (gSystem->mkdir(GetShuttleLogDir(), kTRUE)) {
1725 AliError(Form("Can't open directory <%s>", GetShuttleLogDir()));
1730 gSystem->FreeDirectory(dir);
1733 TString toLog = Form("%s (%d): %s - ", TTimeStamp(time(0)).AsString("s"), getpid(), detector);
1734 if (GetCurrentRun() >= 0)
1735 toLog += Form("run %d - ", GetCurrentRun());
1736 toLog += Form("%s", message);
1738 AliInfo(toLog.Data());
1741 if (GetCurrentRun() >= 0)
1742 fileName.Form("%s/%s_%d.log", GetShuttleLogDir(), detector, GetCurrentRun());
1744 fileName.Form("%s/%s.log", GetShuttleLogDir(), detector);
1746 gSystem->ExpandPathName(fileName);
1749 logFile.open(fileName, ofstream::out | ofstream::app);
1751 if (!logFile.is_open()) {
1752 AliError(Form("Could not open file %s", fileName.Data()));
1756 logFile << toLog.Data() << "\n";
1761 //______________________________________________________________________________________________
1762 Bool_t AliShuttle::Collect(Int_t run)
1765 // Collects conditions data for all UNPROCESSED run written to DAQ LogBook in case of run = -1 (default)
1766 // If a dedicated run is given this run is processed
1768 // In operational mode, this is the Shuttle function triggered by the EOR signal.
1772 Log("SHUTTLE","Collect - Shuttle called. Collecting conditions data for unprocessed runs");
1774 Log("SHUTTLE", Form("Collect - Shuttle called. Collecting conditions data for run %d", run));
1776 SetLastAction("Starting");
1778 TString whereClause("where shuttle_done=0");
1780 whereClause += Form(" and run=%d", run);
1782 TObjArray shuttleLogbookEntries;
1783 if (!QueryShuttleLogbook(whereClause, shuttleLogbookEntries))
1785 Log("SHUTTLE", "Collect - Can't retrieve entries from Shuttle logbook");
1789 if (shuttleLogbookEntries.GetEntries() == 0)
1792 Log("SHUTTLE","Collect - Found no UNPROCESSED runs in Shuttle logbook");
1794 Log("SHUTTLE", Form("Collect - Run %d is already DONE "
1795 "or it does not exist in Shuttle logbook", run));
1799 for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
1800 fFirstUnprocessed[iDet] = kTRUE;
1804 // query Shuttle logbook for earlier runs, check if some detectors are unprocessed,
1805 // flag them into fFirstUnprocessed array
1806 TString whereClause(Form("where shuttle_done=0 and run < %d", run));
1807 TObjArray tmpLogbookEntries;
1808 if (!QueryShuttleLogbook(whereClause, tmpLogbookEntries))
1810 Log("SHUTTLE", "Collect - Can't retrieve entries from Shuttle logbook");
1814 TIter iter(&tmpLogbookEntries);
1815 AliShuttleLogbookEntry* anEntry = 0;
1816 while ((anEntry = dynamic_cast<AliShuttleLogbookEntry*> (iter.Next())))
1818 for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
1820 if (anEntry->GetDetectorStatus(iDet) == AliShuttleLogbookEntry::kUnprocessed)
1822 AliDebug(2, Form("Run %d: setting %s as \"not first time unprocessed\"",
1823 anEntry->GetRun(), GetDetName(iDet)));
1824 fFirstUnprocessed[iDet] = kFALSE;
1832 if (!RetrieveConditionsData(shuttleLogbookEntries))
1834 Log("SHUTTLE", "Collect - Process of at least one run failed");
1838 Log("SHUTTLE", "Collect - Requested run(s) successfully processed");
1842 //______________________________________________________________________________________________
1843 Bool_t AliShuttle::RetrieveConditionsData(const TObjArray& dateEntries)
1845 // Retrieve conditions data for all runs that aren't processed yet
1847 Bool_t hasError = kFALSE;
1849 TIter iter(&dateEntries);
1850 AliShuttleLogbookEntry* anEntry;
1852 while ((anEntry = (AliShuttleLogbookEntry*) iter.Next())){
1853 if (!Process(anEntry)){
1857 // clean SHUTTLE temp directory
1858 TString command = Form("rm -f %s/*.shuttle", GetShuttleTempDir());
1859 gSystem->Exec(command.Data());
1862 return hasError == kFALSE;
1865 //______________________________________________________________________________________________
1866 ULong_t AliShuttle::GetTimeOfLastAction() const
1870 fMonitoringMutex->Lock();
1872 tmp = fLastActionTime;
1874 fMonitoringMutex->UnLock();
1879 //______________________________________________________________________________________________
1880 const TString AliShuttle::GetLastAction() const
1882 // returns a string description of the last action
1886 fMonitoringMutex->Lock();
1890 fMonitoringMutex->UnLock();
1895 //______________________________________________________________________________________________
1896 void AliShuttle::SetLastAction(const char* action)
1898 // updates the monitoring variables
1900 fMonitoringMutex->Lock();
1902 fLastAction = action;
1903 fLastActionTime = time(0);
1905 fMonitoringMutex->UnLock();
1908 //______________________________________________________________________________________________
1909 const char* AliShuttle::GetRunParameter(const char* param)
1911 // returns run parameter read from DAQ logbook
1913 if(!fLogbookEntry) {
1914 AliError("No logbook entry!");
1918 return fLogbookEntry->GetRunParameter(param);
1921 //______________________________________________________________________________________________
1922 Bool_t AliShuttle::SendMail()
1924 // sends a mail to the subdetector expert in case of preprocessor error
1926 void* dir = gSystem->OpenDirectory(GetShuttleLogDir());
1929 if (gSystem->mkdir(GetShuttleLogDir(), kTRUE))
1931 AliError(Form("Can't open directory <%s>", GetShuttleLogDir()));
1936 gSystem->FreeDirectory(dir);
1939 TString bodyFileName;
1940 bodyFileName.Form("%s/mail.body", GetShuttleLogDir());
1941 gSystem->ExpandPathName(bodyFileName);
1944 mailBody.open(bodyFileName, ofstream::out);
1946 if (!mailBody.is_open())
1948 AliError(Form("Could not open mail body file %s", bodyFileName.Data()));
1953 TIter iterExperts(fConfig->GetResponsibles(fCurrentDetector));
1954 TObjString *anExpert=0;
1955 while ((anExpert = (TObjString*) iterExperts.Next()))
1957 to += Form("%s,", anExpert->GetName());
1959 to.Remove(to.Length()-1);
1960 AliDebug(2, Form("to: %s",to.Data()));
1962 // TODO this will be removed...
1963 if (to.Contains("not_yet_set")) {
1964 AliInfo("List of detector responsibles not yet set!");
1968 TString cc="alberto.colla@cern.ch";
1970 TString subject = Form("%s Shuttle preprocessor error in run %d !",
1971 fCurrentDetector.Data(), GetCurrentRun());
1972 AliDebug(2, Form("subject: %s", subject.Data()));
1974 TString body = Form("Dear %s expert(s), \n\n", fCurrentDetector.Data());
1975 body += Form("SHUTTLE just detected that your preprocessor "
1976 "exited with ERROR state in run %d!!\n\n", GetCurrentRun());
1977 body += Form("Please check %s status on the web page asap!\n\n", fCurrentDetector.Data());
1978 body += Form("The last 10 lines of %s log file are following:\n\n");
1980 AliDebug(2, Form("Body begin: %s", body.Data()));
1982 mailBody << body.Data();
1984 mailBody.open(bodyFileName, ofstream::out | ofstream::app);
1986 TString logFileName = Form("%s/%s_%d.log", GetShuttleLogDir(), fCurrentDetector.Data(), GetCurrentRun());
1987 TString tailCommand = Form("tail -n 10 %s >> %s", logFileName.Data(), bodyFileName.Data());
1988 if (gSystem->Exec(tailCommand.Data()))
1990 mailBody << Form("%s log file not found ...\n\n", fCurrentDetector.Data());
1993 TString endBody = Form("------------------------------------------------------\n\n");
1994 endBody += Form("In case of problems please contact the SHUTTLE core team.\n\n");
1995 endBody += "Please do not answer this message directly, it is automatically generated.\n\n";
1996 endBody += "Sincerely yours,\n\n \t\t\tthe SHUTTLE\n";
1998 AliDebug(2, Form("Body end: %s", endBody.Data()));
2000 mailBody << endBody.Data();
2005 TString mailCommand = Form("mail -s \"%s\" -c %s %s < %s",
2009 bodyFileName.Data());
2010 AliDebug(2, Form("mail command: %s", mailCommand.Data()));
2012 Bool_t result = gSystem->Exec(mailCommand.Data());