1 /**************************************************************************
2 * Copyright(c) 1998-1999, ALICE Experiment at CERN, All rights reserved. *
4 * Author: The ALICE Off-line Project. *
5 * Contributors are mentioned in the code where appropriate. *
7 * Permission to use, copy, modify and distribute this software and its *
8 * documentation strictly for non-commercial purposes is hereby granted *
9 * without fee, provided that the above copyright notice appears in all *
10 * copies and that both the copyright notice and this permission notice *
11 * appear in the supporting documentation. The authors make no claims *
12 * about the suitability of this software for any purpose. It is *
13 * provided "as is" without express or implied warranty. *
14 **************************************************************************/
18 Revision 1.32 2007/02/28 10:41:56 acolla
19 Run type field added in SHUTTLE framework. Run type is read from "run type" logbook and retrieved by
20 AliPreprocessor::GetRunType() function.
21 Added some ldap definition files.
23 Revision 1.30 2007/02/13 11:23:21 acolla
24 Moved getters and setters of Shuttle's main OCDB/Reference, local
25 OCDB/Reference, temp and log folders to AliShuttleInterface
27 Revision 1.27 2007/01/30 17:52:42 jgrosseo
28 adding monalisa monitoring
30 Revision 1.26 2007/01/23 19:20:03 acolla
31 Removed old ldif files, added TOF, MCH ldif files. Added some options in
32 AliShuttleConfig::Print. Added in Ali Shuttle: SetShuttleTempDir and
35 Revision 1.25 2007/01/15 19:13:52 acolla
36 Moved some AliInfo to AliDebug in SendMail function
38 Revision 1.21 2006/12/07 08:51:26 jgrosseo
40 table, db names in ldap configuration
41 added GRP preprocessor
42 DCS data can also be retrieved by data point
44 Revision 1.20 2006/11/16 16:16:48 jgrosseo
45 introducing strict run ordering flag
46 removed giving preprocessor name to preprocessor, they have to know their name themselves ;-)
48 Revision 1.19 2006/11/06 14:23:04 jgrosseo
49 major update (Alberto)
50 o) reading of run parameters from the logbook
51 o) online offline naming conversion
52 o) standalone DCSclient package
54 Revision 1.18 2006/10/20 15:22:59 jgrosseo
55 o) Adding time out to the execution of the preprocessors: The Shuttle forks and the parent process monitors the child
56 o) Merging Collect, CollectAll, CollectNew function
57 o) Removing implementation of empty copy constructors (declaration still there!)
59 Revision 1.17 2006/10/05 16:20:55 jgrosseo
60 adapting to new CDB classes
62 Revision 1.16 2006/10/05 15:46:26 jgrosseo
63 applying to the new interface
65 Revision 1.15 2006/10/02 16:38:39 jgrosseo
68 storing of objects that failed to be stored to the grid before
69 interfacing of shuttle status table in daq system
71 Revision 1.14 2006/08/29 09:16:05 jgrosseo
74 Revision 1.13 2006/08/15 10:50:00 jgrosseo
75 effc++ corrections (alberto)
77 Revision 1.12 2006/08/08 14:19:29 jgrosseo
78 Update to shuttle classes (Alberto)
80 - Possibility to set the full object's path in the Preprocessor's and
81 Shuttle's Store functions
82 - Possibility to extend the object's run validity in the same classes
83 ("startValidity" and "validityInfinite" parameters)
84 - Implementation of the StoreReferenceData function to store reference
85 data in a dedicated CDB storage.
87 Revision 1.11 2006/07/21 07:37:20 jgrosseo
88 last run is stored after each run
90 Revision 1.10 2006/07/20 09:54:40 jgrosseo
91 introducing status management: The processing per subdetector is divided into several steps,
92 after each step the status is stored on disk. If the system crashes in any of the steps the Shuttle
93 can keep track of the number of failures and skips further processing after a certain threshold is
94 exceeded. These thresholds can be configured in LDAP.
96 Revision 1.9 2006/07/19 10:09:55 jgrosseo
97 new configuration, accesst to DAQ FES (Alberto)
99 Revision 1.8 2006/07/11 12:44:36 jgrosseo
100 adding parameters for extended validity range of data produced by preprocessor
102 Revision 1.7 2006/07/10 14:37:09 jgrosseo
103 small fix + todo comment
105 Revision 1.6 2006/07/10 13:01:41 jgrosseo
106 enhanced storing of last sucessfully processed run (alberto)
108 Revision 1.5 2006/07/04 14:59:57 jgrosseo
109 revision of AliDCSValue: Removed wrapper classes, reduced storage size per value by factor 2
111 Revision 1.4 2006/06/12 09:11:16 jgrosseo
112 coding conventions (Alberto)
114 Revision 1.3 2006/06/06 14:26:40 jgrosseo
115 o) removed files that were moved to STEER
116 o) shuttle updated to follow the new interface (Alberto)
118 Revision 1.2 2006/03/07 07:52:34 hristov
119 New version (B.Yordanov)
121 Revision 1.6 2005/11/19 17:19:14 byordano
122 RetrieveDATEEntries and RetrieveConditionsData added
124 Revision 1.5 2005/11/19 11:09:27 byordano
125 AliShuttle declaration added
127 Revision 1.4 2005/11/17 17:47:34 byordano
128 TList changed to TObjArray
130 Revision 1.3 2005/11/17 14:43:23 byordano
133 Revision 1.1.1.1 2005/10/28 07:33:58 hristov
134 Initial import as subdirectory in AliRoot
136 Revision 1.2 2005/09/13 08:41:15 byordano
137 default startTime endTime added
139 Revision 1.4 2005/08/30 09:13:02 byordano
142 Revision 1.3 2005/08/29 21:15:47 byordano
148 // This class is the main manager for AliShuttle.
149 // It organizes the data retrieval from DCS and call the
150 // interface methods of AliPreprocessor.
151 // For every detector in AliShuttleConfgi (see AliShuttleConfig),
152 // data for its set of aliases is retrieved. If there is registered
153 // AliPreprocessor for this detector then it will be used
154 // accroding to the schema (see AliPreprocessor).
155 // If there isn't registered AliPreprocessor than the retrieved
156 // data is stored automatically to the undelying AliCDBStorage.
157 // For detSpec is used the alias name.
160 #include "AliShuttle.h"
162 #include "AliCDBManager.h"
163 #include "AliCDBStorage.h"
164 #include "AliCDBId.h"
165 #include "AliCDBRunRange.h"
166 #include "AliCDBPath.h"
167 #include "AliCDBEntry.h"
168 #include "AliShuttleConfig.h"
169 #include "DCSClient/AliDCSClient.h"
171 #include "AliPreprocessor.h"
172 #include "AliShuttleStatus.h"
173 #include "AliShuttleLogbookEntry.h"
178 #include <TTimeStamp.h>
179 #include <TObjString.h>
180 #include <TSQLServer.h>
181 #include <TSQLResult.h>
185 #include <TMonaLisaWriter.h>
189 #include <sys/types.h>
190 #include <sys/wait.h>
194 Bool_t AliShuttle::fgkProcessDCS(kTRUE);
196 //______________________________________________________________________________________________
197 AliShuttle::AliShuttle(const AliShuttleConfig* config,
198 UInt_t timeout, Int_t retries):
200 fTimeout(timeout), fRetries(retries),
211 // config: AliShuttleConfig used
212 // timeout: timeout used for AliDCSClient connection
213 // retries: the number of retries in case of connection error.
216 if (!fConfig->IsValid()) AliFatal("********** !!!!! Invalid configuration !!!!! **********");
217 for(int iSys=0;iSys<4;iSys++) {
220 fFXSlist[iSys].SetOwner(kTRUE);
222 fPreprocessorMap.SetOwner(kTRUE);
224 for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
225 fFirstUnprocessed[iDet] = kFALSE;
227 fMonitoringMutex = new TMutex();
230 //______________________________________________________________________________________________
231 AliShuttle::~AliShuttle()
235 fPreprocessorMap.DeleteAll();
236 for(int iSys=0;iSys<4;iSys++)
238 fServer[iSys]->Close();
239 delete fServer[iSys];
248 if (fMonitoringMutex)
250 delete fMonitoringMutex;
251 fMonitoringMutex = 0;
255 //______________________________________________________________________________________________
256 void AliShuttle::RegisterPreprocessor(AliPreprocessor* preprocessor)
259 // Registers new AliPreprocessor.
260 // It uses GetName() for indentificator of the pre processor.
261 // The pre processor is registered it there isn't any other
262 // with the same identificator (GetName()).
265 const char* detName = preprocessor->GetName();
266 if(GetDetPos(detName) < 0)
267 AliFatal(Form("********** !!!!! Invalid detector name: %s !!!!! **********", detName));
269 if (fPreprocessorMap.GetValue(detName)) {
270 AliWarning(Form("AliPreprocessor %s is already registered!", detName));
274 fPreprocessorMap.Add(new TObjString(detName), preprocessor);
276 //______________________________________________________________________________________________
277 Bool_t AliShuttle::Store(const AliCDBPath& path, TObject* object,
278 AliCDBMetaData* metaData, Int_t validityStart, Bool_t validityInfinite)
280 // Stores a CDB object in the storage for offline reconstruction. Objects that are not needed for
281 // offline reconstruction, but should be stored anyway (e.g. for debugging) should NOT be stored
282 // using this function. Use StoreReferenceData instead!
283 // It calls StoreLocally function which temporarily stores the data locally; when the preprocessor
284 // finishes the data are transferred to the main storage (Grid).
286 return StoreLocally(fgkLocalCDB, path, object, metaData, validityStart, validityInfinite);
290 //______________________________________________________________________________________________
291 Bool_t AliShuttle::StoreReferenceData(const AliCDBPath& path, TObject* object, AliCDBMetaData* metaData)
293 // Stores a CDB object in the storage for reference data. This objects will not be available during
294 // offline reconstrunction. Use this function for reference data only!
295 // It calls StoreLocally function which temporarily stores the data locally; when the preprocessor
296 // finishes the data are transferred to the main storage (Grid).
298 return StoreLocally(fgkLocalRefStorage, path, object, metaData);
302 //______________________________________________________________________________________________
303 Bool_t AliShuttle::StoreLocally(const TString& localUri,
304 const AliCDBPath& path, TObject* object, AliCDBMetaData* metaData,
305 Int_t validityStart, Bool_t validityInfinite)
307 // Store object temporarily in local storage. Parameters are passed by Store and StoreReferenceData functions.
308 // when the preprocessor finishes the data are transferred to the main storage (Grid).
309 // The parameters are:
310 // 1) Uri of the backup storage (Local)
311 // 2) the object's path.
312 // 3) the object to be stored
313 // 4) the metaData to be associated with the object
314 // 5) the validity start run number w.r.t. the current run,
315 // if the data is valid only for this run leave the default 0
316 // 6) specifies if the calibration data is valid for infinity (this means until updated),
317 // typical for calibration runs, the default is kFALSE
319 // returns 0 if fail, 1 otherwise
321 const char* cdbType = (localUri == fgkLocalCDB) ? "CDB" : "Reference";
323 Int_t firstRun = GetCurrentRun() - validityStart;
325 AliError("First valid run happens to be less than 0! Setting it to 0.");
330 if(validityInfinite) {
331 lastRun = AliCDBRunRange::Infinity();
333 lastRun = GetCurrentRun();
336 // Version is set to current run, it will be used later to transfer data to Grid
337 AliCDBId id(path, firstRun, lastRun, GetCurrentRun(), -1);
339 if(! dynamic_cast<TObjString*> (metaData->GetProperty("RunUsed(TObjString)"))){
340 TObjString runUsed = Form("%d", GetCurrentRun());
341 metaData->SetProperty("RunUsed(TObjString)", runUsed.Clone());
344 Bool_t result = kFALSE;
346 if (!(AliCDBManager::Instance()->GetStorage(localUri))) {
347 Log("SHUTTLE", Form("StoreLocally - Cannot activate local %s storage", cdbType));
349 result = AliCDBManager::Instance()->GetStorage(localUri)
350 ->Put(object, id, metaData);
355 Log("SHUTTLE", Form("StoreLocally - Can't store %s data!", fCurrentDetector.Data()));
361 //______________________________________________________________________________________________
362 Bool_t AliShuttle::StoreOCDB()
364 // Called when preprocessor ends successfully or when previous storage attempt failed (kStoreError status)
365 // Calls underlying StoreOCDB(const char*) function twice, for OCDB and Reference storage.
367 AliInfo("Storing OCDB data ...");
368 Bool_t resultCDB = StoreOCDB(fgkMainCDB);
370 AliInfo("Storing reference data ...");
371 Bool_t resultRef = StoreOCDB(fgkMainRefStorage);
373 return resultCDB && resultRef;
376 //______________________________________________________________________________________________
377 Bool_t AliShuttle::StoreOCDB(const TString& gridURI)
380 // Called by StoreOCDB(), performs actual storage to the main OCDB and reference storages (Grid)
383 TObjArray* gridIds=0;
385 Bool_t result = kTRUE;
387 const char* type = 0;
389 if(gridURI == fgkMainCDB) {
391 localURI = fgkLocalCDB;
392 } else if(gridURI == fgkMainRefStorage) {
394 localURI = fgkLocalRefStorage;
396 AliError(Form("Invalid storage URI: %s", gridURI.Data()));
400 AliCDBManager* man = AliCDBManager::Instance();
402 AliCDBStorage *gridSto = man->GetStorage(gridURI);
405 Form("StoreOCDB - cannot activate main %s storage", type));
409 gridIds = gridSto->GetQueryCDBList();
411 // get objects previously stored in local CDB
412 AliCDBStorage *localSto = man->GetStorage(localURI);
415 Form("StoreOCDB - cannot activate local %s storage", type));
418 AliCDBPath aPath(GetOfflineDetName(fCurrentDetector.Data()),"*","*");
419 // Local objects were stored with current run as Grid version!
420 TList* localEntries = localSto->GetAll(aPath.GetPath(), GetCurrentRun(), GetCurrentRun());
421 localEntries->SetOwner(1);
423 // loop on local stored objects
424 TIter localIter(localEntries);
425 AliCDBEntry *aLocEntry = 0;
426 while((aLocEntry = dynamic_cast<AliCDBEntry*> (localIter.Next()))){
427 aLocEntry->SetOwner(1);
428 AliCDBId aLocId = aLocEntry->GetId();
429 aLocEntry->SetVersion(-1);
430 aLocEntry->SetSubVersion(-1);
432 // If local object is valid up to infinity we store it only if it is
433 // the first unprocessed run!
434 if (aLocId.GetLastRun() == AliCDBRunRange::Infinity() &&
435 !fFirstUnprocessed[GetDetPos(fCurrentDetector)])
437 Log("SHUTTLE", Form("StoreOCDB - %s: object %s has validity infinite but "
438 "there are previous unprocessed runs!",
439 fCurrentDetector.Data(), aLocId.GetPath().Data()));
443 // loop on Grid valid Id's
444 Bool_t store = kTRUE;
445 TIter gridIter(gridIds);
446 AliCDBId* aGridId = 0;
447 while((aGridId = dynamic_cast<AliCDBId*> (gridIter.Next()))){
448 if(aGridId->GetPath() != aLocId.GetPath()) continue;
449 // skip all objects valid up to infinity
450 if(aGridId->GetLastRun() == AliCDBRunRange::Infinity()) continue;
451 // if we get here, it means there's already some more recent object stored on Grid!
456 // If we get here, the file can be stored!
457 Bool_t storeOk = gridSto->Put(aLocEntry);
458 if(!store || storeOk){
462 Log(fCurrentDetector.Data(),
463 Form("StoreOCDB - A more recent object already exists in %s storage: <%s>",
464 type, aGridId->ToString().Data()));
467 Form("StoreOCDB - Object <%s> successfully put into %s storage",
468 aLocId.ToString().Data(), type));
471 // removing local filename...
473 localSto->IdToFilename(aLocId, filename);
474 AliInfo(Form("Removing local file %s", filename.Data()));
475 RemoveFile(filename.Data());
479 Form("StoreOCDB - Grid %s storage of object <%s> failed",
480 type, aLocId.ToString().Data()));
484 localEntries->Clear();
489 //______________________________________________________________________________________________
490 void AliShuttle::CleanLocalStorage(const TString& uri)
492 // Called in case the preprocessor is declared failed. Remove remaining objects from the local storages.
494 const char* type = 0;
495 if(uri == fgkLocalCDB) {
497 } else if(uri == fgkLocalRefStorage) {
500 AliError(Form("Invalid storage URI: %s", uri.Data()));
504 AliCDBManager* man = AliCDBManager::Instance();
506 // open local storage
507 AliCDBStorage *localSto = man->GetStorage(uri);
510 Form("CleanLocalStorage - cannot activate local %s storage", type));
514 TString filename(Form("%s/%s/*/Run*_v%d_s*.root",
515 localSto->GetBaseFolder().Data(), fCurrentDetector.Data(), GetCurrentRun()));
517 AliInfo(Form("filename = %s", filename.Data()));
519 AliInfo(Form("Removing remaining local files from run %d and detector %s ...",
520 GetCurrentRun(), fCurrentDetector.Data()));
522 RemoveFile(filename.Data());
526 //______________________________________________________________________________________________
527 void AliShuttle::RemoveFile(const char* filename)
529 // removes local file
531 TString command(Form("rm -f %s", filename));
533 Int_t result = gSystem->Exec(command.Data());
536 Log("SHUTTLE", Form("RemoveFile - %s: Cannot remove file %s!",
537 fCurrentDetector.Data(), filename));
541 //______________________________________________________________________________________________
542 AliShuttleStatus* AliShuttle::ReadShuttleStatus()
544 // Reads the AliShuttleStatus from the CDB
551 fStatusEntry = AliCDBManager::Instance()->GetStorage(GetLocalCDB())
552 ->Get(Form("/SHUTTLE/STATUS/%s", fCurrentDetector.Data()), GetCurrentRun());
554 if (!fStatusEntry) return 0;
555 fStatusEntry->SetOwner(1);
557 AliShuttleStatus* status = dynamic_cast<AliShuttleStatus*> (fStatusEntry->GetObject());
559 AliError("Invalid object stored to CDB!");
566 //______________________________________________________________________________________________
567 Bool_t AliShuttle::WriteShuttleStatus(AliShuttleStatus* status)
569 // writes the status for one subdetector
576 Int_t run = GetCurrentRun();
578 AliCDBId id(AliCDBPath("SHUTTLE", "STATUS", fCurrentDetector), run, run);
580 fStatusEntry = new AliCDBEntry(status, id, new AliCDBMetaData);
581 fStatusEntry->SetOwner(1);
583 UInt_t result = AliCDBManager::Instance()->GetStorage(fgkLocalCDB)->Put(fStatusEntry);
586 Log("SHUTTLE", Form("WriteShuttleStatus - Failed for %s, run %d",
587 fCurrentDetector.Data(), run));
596 //______________________________________________________________________________________________
597 void AliShuttle::UpdateShuttleStatus(AliShuttleStatus::Status newStatus, Bool_t increaseCount)
599 // changes the AliShuttleStatus for the given detector and run to the given status
602 AliError("UNEXPECTED: fStatusEntry empty");
606 AliShuttleStatus* status = dynamic_cast<AliShuttleStatus*> (fStatusEntry->GetObject());
609 Log("SHUTTLE", "UNEXPECTED: status could not be read from current CDB entry");
613 TString actionStr = Form("UpdateShuttleStatus - %s: Changing state from %s to %s",
614 fCurrentDetector.Data(),
615 status->GetStatusName(),
616 status->GetStatusName(newStatus));
617 Log("SHUTTLE", actionStr);
618 SetLastAction(actionStr);
620 status->SetStatus(newStatus);
621 if (increaseCount) status->IncreaseCount();
623 AliCDBManager::Instance()->GetStorage(fgkLocalCDB)->Put(fStatusEntry);
628 //______________________________________________________________________________________________
629 void AliShuttle::SendMLInfo()
632 // sends ML information about the current status of the current detector being processed
635 AliShuttleStatus* status = dynamic_cast<AliShuttleStatus*> (fStatusEntry->GetObject());
638 Log("SHUTTLE", "SendMLInfo - UNEXPECTED: status could not be read from current CDB entry");
642 TMonaLisaText mlStatus(Form("%s_status", fCurrentDetector.Data()), status->GetStatusName());
643 TMonaLisaValue mlRetryCount(Form("%s_count", fCurrentDetector.Data()), status->GetCount());
646 mlList.Add(&mlStatus);
647 mlList.Add(&mlRetryCount);
649 fMonaLisa->SendParameters(&mlList);
652 //______________________________________________________________________________________________
653 Bool_t AliShuttle::ContinueProcessing()
655 // this function reads the AliShuttleStatus information from CDB and
656 // checks if the processing should be continued
657 // if yes it returns kTRUE and updates the AliShuttleStatus with nextStatus
659 if (!fConfig->HostProcessDetector(fCurrentDetector)) return kFALSE;
661 AliPreprocessor* aPreprocessor =
662 dynamic_cast<AliPreprocessor*> (fPreprocessorMap.GetValue(fCurrentDetector));
665 AliInfo(Form("%s: no preprocessor registered", fCurrentDetector.Data()));
669 AliShuttleLogbookEntry::Status entryStatus =
670 fLogbookEntry->GetDetectorStatus(fCurrentDetector);
672 if(entryStatus != AliShuttleLogbookEntry::kUnprocessed) {
673 AliInfo(Form("ContinueProcessing - %s is %s",
674 fCurrentDetector.Data(),
675 fLogbookEntry->GetDetectorStatusName(entryStatus)));
679 // if we get here, according to Shuttle logbook subdetector is in UNPROCESSED state
681 // check if current run is first unprocessed run for current detector
682 if (fConfig->StrictRunOrder(fCurrentDetector) &&
683 !fFirstUnprocessed[GetDetPos(fCurrentDetector)])
685 Log("SHUTTLE", Form("ContinueProcessing - %s requires strict run ordering but this is not the first unprocessed run!"));
689 AliShuttleStatus* status = ReadShuttleStatus();
692 Log("SHUTTLE", Form("ContinueProcessing - %s: Processing first time",
693 fCurrentDetector.Data()));
694 status = new AliShuttleStatus(AliShuttleStatus::kStarted);
695 return WriteShuttleStatus(status);
698 // The following two cases shouldn't happen if Shuttle Logbook was correctly updated.
699 // If it happens it may mean Logbook updating failed... let's do it now!
700 if (status->GetStatus() == AliShuttleStatus::kDone ||
701 status->GetStatus() == AliShuttleStatus::kFailed){
702 Log("SHUTTLE", Form("ContinueProcessing - %s is already %s. Updating Shuttle Logbook",
703 fCurrentDetector.Data(),
704 status->GetStatusName(status->GetStatus())));
705 UpdateShuttleLogbook(fCurrentDetector.Data(),
706 status->GetStatusName(status->GetStatus()));
710 if (status->GetStatus() == AliShuttleStatus::kStoreError) {
712 Form("ContinueProcessing - %s: Grid storage of one or more objects failed. Trying again now",
713 fCurrentDetector.Data()));
715 Log("SHUTTLE", Form("ContinueProcessing - %s: all objects successfully stored into main storage",
716 fCurrentDetector.Data()));
717 UpdateShuttleStatus(AliShuttleStatus::kDone);
718 UpdateShuttleLogbook(fCurrentDetector.Data(), "DONE");
721 Form("ContinueProcessing - %s: Grid storage failed again",
722 fCurrentDetector.Data()));
723 // trigger ML information manually because we do not had a status change
729 // if we get here, there is a restart
730 Bool_t cont = kFALSE;
733 if (status->GetCount() >= fConfig->GetMaxRetries()) {
734 Log("SHUTTLE", Form("ContinueProcessing - %s failed %d times in status %s - "
735 "Updating Shuttle Logbook", fCurrentDetector.Data(),
736 status->GetCount(), status->GetStatusName()));
737 UpdateShuttleLogbook(fCurrentDetector.Data(), "FAILED");
738 UpdateShuttleStatus(AliShuttleStatus::kFailed);
740 // there may still be objects in local OCDB and reference storage
741 // and FXS databases may be not updated: do it now!
742 CleanLocalStorage(fgkLocalCDB);
743 CleanLocalStorage(fgkLocalRefStorage);
744 UpdateTableFailCase();
747 Log("SHUTTLE", Form("ContinueProcessing - %s: restarting. "
748 "Aborted before with %s. Retry number %d.", fCurrentDetector.Data(),
749 status->GetStatusName(), status->GetCount()));
750 UpdateShuttleStatus(AliShuttleStatus::kStarted, kTRUE);
754 // Send mail to detector expert!
755 AliInfo(Form("Sending mail to %s expert...", fCurrentDetector.Data()));
757 Log("SHUTTLE", Form("ContinueProcessing - Could not send mail to %s expert",
758 fCurrentDetector.Data()));
763 //______________________________________________________________________________________________
764 Bool_t AliShuttle::Process(AliShuttleLogbookEntry* entry)
767 // Makes data retrieval for all detectors in the configuration.
768 // entry: Shuttle logbook entry, contains run paramenters and status of detectors
769 // (Unprocessed, Inactive, Failed or Done).
770 // Returns kFALSE in case of error occured and kTRUE otherwise
773 if(!entry) return kFALSE;
775 fLogbookEntry = entry;
777 if (fLogbookEntry->IsDone())
779 Log("SHUTTLE","Process - Shuttle is already DONE. Updating logbook");
780 UpdateShuttleLogbook("shuttle_done");
785 // create ML instance that monitors this run
786 fMonaLisa = new TMonaLisaWriter(Form("%d", GetCurrentRun()), "SHUTTLE", "aliendb1.cern.ch");
787 // disable monitoring of other parameters that come e.g. from TFile
788 gMonitoringWriter = 0;
790 AliInfo(Form("\n\n \t\t\t^*^*^*^*^*^*^*^*^*^*^*^* run %d: START ^*^*^*^*^*^*^*^*^*^*^*^* \n",
793 // Set run type from run type logbook into current fLogbookEntry
796 // Send the information to ML
797 TMonaLisaText mlStatus("SHUTTLE_status", "Processing");
800 mlList.Add(&mlStatus);
802 fMonaLisa->SendParameters(&mlList);
804 fLogbookEntry->Print("all");
807 Bool_t hasError = kFALSE;
809 AliCDBStorage *mainCDBSto = AliCDBManager::Instance()->GetStorage(fgkMainCDB);
810 if(mainCDBSto) mainCDBSto->QueryCDB(GetCurrentRun());
811 AliCDBStorage *mainRefSto = AliCDBManager::Instance()->GetStorage(fgkMainRefStorage);
812 if(mainRefSto) mainRefSto->QueryCDB(GetCurrentRun());
814 // Loop on detectors in the configuration
815 TIter iter(fConfig->GetDetectors());
816 TObjString* aDetector = 0;
818 while ((aDetector = (TObjString*) iter.Next()))
820 fCurrentDetector = aDetector->String();
822 if (ContinueProcessing() == kFALSE) continue;
824 AliInfo(Form("\n\n \t\t\t****** run %d - %s: START ******",
825 GetCurrentRun(), aDetector->GetName()));
827 for(Int_t iSys=0;iSys<3;iSys++) fFXSCalled[iSys]=kFALSE;
829 Log(fCurrentDetector.Data(), "Starting processing");
835 Log("SHUTTLE", "ERROR: Forking failed");
840 AliInfo(Form("In parent process of %d - %s: Starting monitoring",
841 GetCurrentRun(), aDetector->GetName()));
843 Long_t begin = time(0);
845 int status; // to be used with waitpid, on purpose an int (not Int_t)!
846 while (waitpid(pid, &status, WNOHANG) == 0)
848 Long_t expiredTime = time(0) - begin;
850 if (expiredTime > fConfig->GetPPTimeOut())
852 Log("SHUTTLE", Form("%s: Process time out. Run time: %d seconds. Killing...",
853 fCurrentDetector.Data(), expiredTime));
857 UpdateShuttleStatus(AliShuttleStatus::kPPTimeOut);
860 gSystem->Sleep(1000);
864 if (expiredTime % 60 == 0)
865 Log("SHUTTLE", Form("Checked process. Run time: %d seconds.",
867 gSystem->Sleep(1000);
871 AliInfo(Form("In parent process of %d - %s: Client has terminated.",
872 GetCurrentRun(), aDetector->GetName()));
874 if (WIFEXITED(status))
876 Int_t returnCode = WEXITSTATUS(status);
878 Log("SHUTTLE", Form("%s: the return code is %d", fCurrentDetector.Data(),
881 if (returnCode != 0) hasError = kTRUE;
887 AliInfo(Form("In client process of %d - %s", GetCurrentRun(), aDetector->GetName()));
889 UInt_t returnCode = ProcessCurrentDetector(); // 0 means success
892 if (returnCode > 0) // TODO: returnCode>0 means Preprocessor ERROR! Preprocessors should follow this!
894 if (returnCode == kDCSErrCode)
896 AliInfo(Form("\n \t\t\t****** run %d - %s: DCS ERROR ****** \n\n",
897 GetCurrentRun(), aDetector->GetName()));
900 AliInfo(Form("\n \t\t\t****** run %d - %s: PREPROCESSOR ERROR ****** \n\n",
901 GetCurrentRun(), aDetector->GetName()));
903 } else { // Preprocessor finished successfully!
905 // Update time_processed field in FXS DB
906 if (UpdateTable() == kFALSE)
907 Log("SHUTTLE", Form("Process - %s: Could not update FXS databases!"));
909 // Transfer the data from local storage to main storage (Grid)
910 UpdateShuttleStatus(AliShuttleStatus::kStoreStarted);
911 if (StoreOCDB() == kFALSE)
913 AliInfo(Form("\n \t\t\t****** run %d - %s: STORAGE ERROR ****** \n\n",
914 GetCurrentRun(), aDetector->GetName()));
915 UpdateShuttleStatus(AliShuttleStatus::kStoreError);
916 returnCode = kStorErrCode;
918 AliInfo(Form("\n \t\t\t****** run %d - %s: DONE ****** \n\n",
919 GetCurrentRun(), aDetector->GetName()));
920 UpdateShuttleStatus(AliShuttleStatus::kDone);
924 for (UInt_t iSys=0; iSys<3; iSys++)
926 if (fFXSCalled[iSys]) fFXSlist[iSys].Clear();
929 AliInfo(Form("Client process of %d - %s is exiting now with %d.",
930 GetCurrentRun(), aDetector->GetName(), returnCode));
932 // the client exits here
933 gSystem->Exit(returnCode);
935 AliError("We should never get here!!!");
939 AliInfo(Form("\n\n \t\t\t^*^*^*^*^*^*^*^*^*^*^*^* run %d: FINISH ^*^*^*^*^*^*^*^*^*^*^*^* \n",
942 //check if shuttle is done for this run, if so update logbook
943 TObjArray checkEntryArray;
944 checkEntryArray.SetOwner(1);
945 TString whereClause = Form("where run=%d", GetCurrentRun());
946 if (!QueryShuttleLogbook(whereClause.Data(), checkEntryArray) || checkEntryArray.GetEntries() == 0) {
947 Log("SHUTTLE", Form("Process - Warning: Cannot check status of run %d on Shuttle logbook!",
949 return hasError == kFALSE;
952 AliShuttleLogbookEntry* checkEntry = dynamic_cast<AliShuttleLogbookEntry*>
953 (checkEntryArray.At(0));
957 if (checkEntry->IsDone())
959 Log("SHUTTLE","Process - Shuttle is DONE. Updating logbook");
960 UpdateShuttleLogbook("shuttle_done");
964 for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
966 if (checkEntry->GetDetectorStatus(iDet) == AliShuttleLogbookEntry::kUnprocessed)
968 AliDebug(2, Form("Run %d: setting %s as \"not first time unprocessed\"",
969 checkEntry->GetRun(), GetDetName(iDet)));
970 fFirstUnprocessed[iDet] = kFALSE;
976 // remove ML instance
982 return hasError == kFALSE;
985 //______________________________________________________________________________________________
986 UInt_t AliShuttle::ProcessCurrentDetector()
989 // Makes data retrieval just for a specific detector (fCurrentDetector).
990 // Threre should be a configuration for this detector.
992 AliInfo(Form("Retrieving values for %s, run %d", fCurrentDetector.Data(), GetCurrentRun()));
997 Bool_t aDCSError = kFALSE;
1000 AliPreprocessor* aPreprocessor =
1001 dynamic_cast<AliPreprocessor*> (fPreprocessorMap.GetValue(fCurrentDetector));
1003 aPreprocessor->Initialize(GetCurrentRun(), GetCurrentStartTime(), GetCurrentEndTime());
1005 Bool_t processDCS = aPreprocessor->ProcessDCS();
1007 // TODO Test only... I've added a flag that allows to
1008 // exclude DCS archive DB query
1009 if (!processDCS || !fgkProcessDCS)
1011 AliInfo("Skipping DCS processing!");
1015 UpdateShuttleStatus(AliShuttleStatus::kDCSStarted);
1017 TString host(fConfig->GetDCSHost(fCurrentDetector));
1018 Int_t port = fConfig->GetDCSPort(fCurrentDetector);
1020 // Retrieval of Aliases
1021 TObjString* anAlias = 0;
1023 Int_t nTotAliases= ((TMap*)fConfig->GetDCSAliases(fCurrentDetector))->GetEntries();
1024 TIter iterAliases(fConfig->GetDCSAliases(fCurrentDetector));
1025 while ((anAlias = (TObjString*) iterAliases.Next()))
1027 TObjArray *valueSet = new TObjArray();
1028 valueSet->SetOwner(1);
1030 if (((iAlias-1) % 500) == 0 || iAlias == nTotAliases)
1031 AliInfo(Form("Querying DCS archive: alias %s (%d of %d)",
1032 anAlias->GetName(), iAlias++, nTotAliases));
1033 aDCSError = (GetValueSet(host, port, anAlias->String(), valueSet, kAlias) == 0);
1037 dcsMap.Add(anAlias->Clone(), valueSet);
1039 Log(fCurrentDetector,
1040 Form("ProcessCurrentDetector - Error while retrieving alias %s",
1041 anAlias->GetName()));
1042 UpdateShuttleStatus(AliShuttleStatus::kDCSError);
1048 // Retrieval of Data Points
1049 TObjString* aDP = 0;
1051 Int_t nTotDPs= ((TMap*)fConfig->GetDCSDataPoints(fCurrentDetector))->GetEntries();
1052 TIter iterDP(fConfig->GetDCSDataPoints(fCurrentDetector));
1053 while ((aDP = (TObjString*) iterDP.Next()))
1055 TObjArray *valueSet = new TObjArray();
1056 valueSet->SetOwner(1);
1057 if (((iDP-1) % 500) == 0 || iDP == nTotDPs)
1058 AliInfo(Form("Querying DCS archive: DP %s (%d of %d)",
1059 aDP->GetName(), iDP++, nTotDPs));
1060 aDCSError = (GetValueSet(host, port, aDP->String(), valueSet, kDP) == 0);
1064 dcsMap.Add(aDP->Clone(), valueSet);
1066 Log(fCurrentDetector,
1067 Form("ProcessCurrentDetector - Error while retrieving data point %s",
1069 UpdateShuttleStatus(AliShuttleStatus::kDCSError);
1076 // DCS Archive DB processing successful. Call Preprocessor!
1077 UpdateShuttleStatus(AliShuttleStatus::kPPStarted);
1079 UInt_t returnValue = aPreprocessor->Process(&dcsMap);
1081 if (returnValue > 0) // Preprocessor error!
1083 UpdateShuttleStatus(AliShuttleStatus::kPPError);
1085 } else { // preprocessor ok!
1086 UpdateShuttleStatus(AliShuttleStatus::kPPDone);
1087 UpdateShuttleLogbook(fCurrentDetector, "DONE");
1088 Log("SHUTTLE", Form("ProcessCurrentDetector - %s preprocessor returned success",
1089 fCurrentDetector.Data()));
1097 //______________________________________________________________________________________________
1098 Bool_t AliShuttle::QueryShuttleLogbook(const char* whereClause,
1101 // Query DAQ's Shuttle logbook and fills detector status object.
1102 // Call QueryRunParameters to query DAQ logbook for run parameters.
1104 entries.SetOwner(1);
1106 // check connection, in case connect
1107 if(!Connect(3)) return kFALSE;
1110 sqlQuery = Form("select * from %s %s order by run", fConfig->GetShuttlelbTable(), whereClause);
1112 TSQLResult* aResult = fServer[3]->Query(sqlQuery);
1114 AliError(Form("Can't execute query <%s>!", sqlQuery.Data()));
1118 AliDebug(2,Form("Query = %s", sqlQuery.Data()));
1120 if(aResult->GetRowCount() == 0) {
1121 // if(sqlQuery.EndsWith("where shuttle_done=0 order by run")){
1122 // Log("SHUTTLE", "QueryShuttleLogbook - All runs in Shuttle Logbook are already DONE");
1126 AliInfo("No entries in Shuttle Logbook match request");
1132 // TODO Check field count!
1133 const UInt_t nCols = 22;
1134 if (aResult->GetFieldCount() != (Int_t) nCols) {
1135 AliError("Invalid SQL result field number!");
1141 while ((aRow = aResult->Next())) {
1142 TString runString(aRow->GetField(0), aRow->GetFieldLength(0));
1143 Int_t run = runString.Atoi();
1145 AliShuttleLogbookEntry *entry = QueryRunParameters(run);
1149 // loop on detectors
1150 for(UInt_t ii = 0; ii < nCols; ii++)
1151 entry->SetDetectorStatus(aResult->GetFieldName(ii), aRow->GetField(ii));
1153 entries.AddLast(entry);
1157 // if(sqlQuery.EndsWith("where shuttle_done=0 order by run"))
1158 // Log("SHUTTLE", Form("QueryShuttleLogbook - Found %d unprocessed runs in Shuttle Logbook",
1159 // entries.GetEntriesFast()));
1164 //______________________________________________________________________________________________
1165 AliShuttleLogbookEntry* AliShuttle::QueryRunParameters(Int_t run)
1168 // Retrieve run parameters written in the DAQ logbook and sets them into AliShuttleLogbookEntry object
1171 // check connection, in case connect
1176 sqlQuery.Form("select * from %s where run=%d", fConfig->GetDAQlbTable(), run);
1178 TSQLResult* aResult = fServer[3]->Query(sqlQuery);
1180 AliError(Form("Can't execute query <%s>!", sqlQuery.Data()));
1184 if (aResult->GetRowCount() == 0) {
1185 Log("SHUTTLE", Form("QueryRunParameters - No entry in DAQ Logbook for run %d. Skipping", run));
1190 if (aResult->GetRowCount() > 1) {
1191 AliError(Form("More than one entry in DAQ Logbook for run %d. Skipping", run));
1196 TSQLRow* aRow = aResult->Next();
1199 AliError(Form("Could not retrieve row for run %d. Skipping", run));
1204 AliShuttleLogbookEntry* entry = new AliShuttleLogbookEntry(run);
1206 for (Int_t ii = 0; ii < aResult->GetFieldCount(); ii++)
1207 entry->SetRunParameter(aResult->GetFieldName(ii), aRow->GetField(ii));
1209 UInt_t startTime = entry->GetStartTime();
1210 UInt_t endTime = entry->GetEndTime();
1212 if (!startTime || !endTime || startTime > endTime) {
1214 Form("QueryRunParameters - Invalid parameters for Run %d: startTime = %d, endTime = %d",
1215 run, startTime, endTime));
1228 //______________________________________________________________________________________________
1229 Bool_t AliShuttle::GetValueSet(const char* host, Int_t port, const char* entry,
1230 TObjArray* valueSet, DCSType type)
1232 // Retrieve all "entry" data points from the DCS server
1233 // host, port: TSocket connection parameters
1234 // entry: name of the alias or data point
1235 // valueSet: array of retrieved AliDCSValue's
1236 // type: kAlias or kDP
1238 AliDCSClient client(host, port, fTimeout, fRetries);
1239 if (!client.IsConnected())
1248 result = client.GetAliasValues(entry,
1249 GetCurrentStartTime(), GetCurrentEndTime(), valueSet);
1253 result = client.GetDPValues(entry,
1254 GetCurrentStartTime(), GetCurrentEndTime(), valueSet);
1259 Log(fCurrentDetector.Data(), Form("GetValueSet - Can't get '%s'! Reason: %s",
1260 entry, AliDCSClient::GetErrorString(result)));
1262 if (result == AliDCSClient::fgkServerError)
1264 Log(fCurrentDetector.Data(), Form("GetValueSet - Server error: %s",
1265 client.GetServerError().Data()));
1274 //______________________________________________________________________________________________
1275 const char* AliShuttle::GetFile(Int_t system, const char* detector,
1276 const char* id, const char* source)
1278 // Get calibration file from file exchange servers
1279 // First queris the FXS database for the file name, using the run, detector, id and source info
1280 // then calls RetrieveFile(filename) for actual copy to local disk
1281 // run: current run being processed (given by Logbook entry fLogbookEntry)
1282 // detector: the Preprocessor name
1283 // id: provided as a parameter by the Preprocessor
1284 // source: provided by the Preprocessor through GetFileSources function
1286 // check connection, in case connect
1287 if (!Connect(system))
1289 Log(detector, Form("GetFile - Couldn't connect to %s FXS database", GetSystemName(system)));
1293 // Query preparation
1294 TString sourceName(source);
1296 TString sqlQueryStart = Form("select filePath,size,fileChecksum from %s where",
1297 fConfig->GetFXSdbTable(system));
1298 TString whereClause = Form("run=%d and detector=\"%s\" and fileId=\"%s\"",
1299 GetCurrentRun(), detector, id);
1303 whereClause += Form(" and DAQsource=\"%s\"", source);
1305 else if (system == kDCS)
1309 else if (system == kHLT)
1311 whereClause += Form(" and DDLnumbers=\"%s\"", source);
1315 TString sqlQuery = Form("%s %s", sqlQueryStart.Data(), whereClause.Data());
1317 AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
1320 TSQLResult* aResult = 0;
1321 aResult = dynamic_cast<TSQLResult*> (fServer[system]->Query(sqlQuery));
1323 Log(detector, Form("GetFileName - Can't execute SQL query to %s database for: id = %s, source = %s",
1324 GetSystemName(system), id, sourceName.Data()));
1328 if(aResult->GetRowCount() == 0)
1331 Form("GetFileName - No entry in %s FXS db for: id = %s, source = %s",
1332 GetSystemName(system), id, sourceName.Data()));
1337 if (aResult->GetRowCount() > 1) {
1339 Form("GetFileName - More than one entry in %s FXS db for: id = %s, source = %s",
1340 GetSystemName(system), id, sourceName.Data()));
1345 if (aResult->GetFieldCount() != nFields) {
1347 Form("GetFileName - Wrong field count in %s FXS db for: id = %s, source = %s",
1348 GetSystemName(system), id, sourceName.Data()));
1353 TSQLRow* aRow = dynamic_cast<TSQLRow*> (aResult->Next());
1356 Log(detector, Form("GetFileName - Empty set result in %s FXS db from query: id = %s, source = %s",
1357 GetSystemName(system), id, sourceName.Data()));
1362 TString filePath(aRow->GetField(0), aRow->GetFieldLength(0));
1363 TString fileSize(aRow->GetField(1), aRow->GetFieldLength(1));
1364 TString fileChecksum(aRow->GetField(2), aRow->GetFieldLength(2));
1369 AliDebug(2, Form("filePath = %s; size = %s, fileChecksum = %s",
1370 filePath.Data(), fileSize.Data(), fileChecksum.Data()));
1372 // retrieved file is renamed to make it unique
1373 TString localFileName = Form("%s_%s_%d_%s_%s.shuttle",
1374 GetSystemName(system), detector, GetCurrentRun(), id, sourceName.Data());
1377 // file retrieval from FXS
1378 UInt_t nRetries = 0;
1379 UInt_t maxRetries = 3;
1380 Bool_t result = kFALSE;
1382 // copy!! if successful TSystem::Exec returns 0
1383 while(nRetries++ < maxRetries) {
1384 AliDebug(2, Form("Trying to copy file. Retry # %d", nRetries));
1385 result = RetrieveFile(system, filePath.Data(), localFileName.Data());
1388 Log(detector, Form("GetFileName - Copy of file %s from %s FXS failed",
1389 filePath.Data(), GetSystemName(system)));
1392 AliInfo(Form("File %s copied from %s FXS into %s/%s",
1393 filePath.Data(), GetSystemName(system),
1394 GetShuttleTempDir(), localFileName.Data()));
1397 if (fileChecksum.Length()>0)
1399 // compare md5sum of local file with the one stored in the FXS DB
1400 Int_t md5Comp = gSystem->Exec(Form("md5sum %s/%s |grep %s 2>&1 > /dev/null",
1401 GetShuttleTempDir(), localFileName.Data(), fileChecksum.Data()));
1405 Log(detector, Form("GetFileName - md5sum of file %s does not match with local copy!",
1411 Log(fCurrentDetector, Form("GetFile - md5sum of file %s not set in %s database, skipping comparison",
1412 filePath.Data(), GetSystemName(system)));
1417 if(!result) return 0;
1419 fFXSCalled[system]=kTRUE;
1420 TObjString *fileParams = new TObjString(Form("%s#!?!#%s", id, sourceName.Data()));
1421 fFXSlist[system].Add(fileParams);
1423 static TString fullLocalFileName;
1424 fullLocalFileName = TString::Format("%s/%s", GetShuttleTempDir(), localFileName.Data());
1426 AliInfo(Form("fullLocalFileName = %s", fullLocalFileName.Data()));
1428 return fullLocalFileName.Data();
1432 //______________________________________________________________________________________________
1433 Bool_t AliShuttle::RetrieveFile(UInt_t system, const char* fxsFileName, const char* localFileName)
1435 // Copies file from FXS to local Shuttle machine
1437 // check temp directory: trying to cd to temp; if it does not exist, create it
1438 AliDebug(2, Form("Copy file %s from %s FXS into %s/%s",
1439 GetSystemName(system), fxsFileName, GetShuttleTempDir(), localFileName));
1441 void* dir = gSystem->OpenDirectory(GetShuttleTempDir());
1443 if (gSystem->mkdir(GetShuttleTempDir(), kTRUE)) {
1444 AliError(Form("Can't open directory <%s>", GetShuttleTempDir()));
1449 gSystem->FreeDirectory(dir);
1452 TString baseFXSFolder;
1455 baseFXSFolder = "FES/";
1457 else if (system == kDCS)
1461 else if (system == kHLT)
1463 baseFXSFolder = "~/";
1467 TString command = Form("scp -oPort=%d -2 %s@%s:%s%s %s/%s",
1468 fConfig->GetFXSPort(system),
1469 fConfig->GetFXSUser(system),
1470 fConfig->GetFXSHost(system),
1471 baseFXSFolder.Data(),
1473 GetShuttleTempDir(),
1476 AliDebug(2, Form("%s",command.Data()));
1478 Bool_t result = (gSystem->Exec(command.Data()) == 0);
1483 //______________________________________________________________________________________________
1484 TList* AliShuttle::GetFileSources(Int_t system, const char* detector, const char* id)
1486 // Get sources producing the condition file Id from file exchange servers
1490 AliError("DCS system has only one source of data!");
1495 // check connection, in case connect
1496 if (!Connect(system))
1498 Log(detector, Form("GetFile - Couldn't connect to %s FXS database", GetSystemName(system)));
1502 TString sourceName = 0;
1505 sourceName = "DAQsource";
1506 } else if (system == kHLT)
1508 sourceName = "DDLnumbers";
1511 TString sqlQueryStart = Form("select %s from %s where", sourceName.Data(), fConfig->GetFXSdbTable(system));
1512 TString whereClause = Form("run=%d and detector=\"%s\" and fileId=\"%s\"",
1513 GetCurrentRun(), detector, id);
1514 TString sqlQuery = Form("%s %s", sqlQueryStart.Data(), whereClause.Data());
1516 AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
1519 TSQLResult* aResult;
1520 aResult = fServer[system]->Query(sqlQuery);
1522 Log(detector, Form("GetFileSources - Can't execute SQL query to %s database for id: %s",
1523 GetSystemName(system), id));
1527 if (aResult->GetRowCount() == 0)
1530 Form("GetFileSources - No entry in %s FXS table for id: %s", GetSystemName(system), id));
1536 TList *list = new TList();
1539 while ((aRow = aResult->Next()))
1542 TString source(aRow->GetField(0), aRow->GetFieldLength(0));
1543 AliDebug(2, Form("%s = %s", sourceName.Data(), source.Data()));
1544 list->Add(new TObjString(source));
1553 //______________________________________________________________________________________________
1554 Bool_t AliShuttle::Connect(Int_t system)
1556 // Connect to MySQL Server of the system's FXS MySQL databases
1557 // DAQ Logbook, Shuttle Logbook and DAQ FXS db are on the same host
1559 // check connection: if already connected return
1560 if(fServer[system] && fServer[system]->IsConnected()) return kTRUE;
1562 TString dbHost, dbUser, dbPass, dbName;
1564 if (system < 3) // FXS db servers
1566 dbHost = Form("mysql://%s:%d", fConfig->GetFXSdbHost(system), fConfig->GetFXSdbPort(system));
1567 dbUser = fConfig->GetFXSdbUser(system);
1568 dbPass = fConfig->GetFXSdbPass(system);
1569 dbName = fConfig->GetFXSdbName(system);
1570 } else { // Run & Shuttle logbook servers
1571 // TODO Will the Shuttle logbook server be the same as the Run logbook server ???
1572 dbHost = Form("mysql://%s:%d", fConfig->GetDAQlbHost(), fConfig->GetDAQlbPort());
1573 dbUser = fConfig->GetDAQlbUser();
1574 dbPass = fConfig->GetDAQlbPass();
1575 dbName = fConfig->GetDAQlbDB();
1578 fServer[system] = TSQLServer::Connect(dbHost.Data(), dbUser.Data(), dbPass.Data());
1579 if (!fServer[system] || !fServer[system]->IsConnected()) {
1582 AliError(Form("Can't establish connection to FXS database for %s",
1583 AliShuttleInterface::GetSystemName(system)));
1585 AliError("Can't establish connection to Run logbook.");
1587 if(fServer[system]) delete fServer[system];
1592 TSQLResult* aResult=0;
1595 aResult = fServer[kDAQ]->GetTables(dbName.Data());
1598 aResult = fServer[kDCS]->GetTables(dbName.Data());
1601 aResult = fServer[kHLT]->GetTables(dbName.Data());
1604 aResult = fServer[3]->GetTables(dbName.Data());
1612 //______________________________________________________________________________________________
1613 Bool_t AliShuttle::UpdateTable()
1615 // Update FXS table filling time_processed field in all rows corresponding to current run and detector
1617 Bool_t result = kTRUE;
1619 for (UInt_t system=0; system<3; system++)
1621 if(!fFXSCalled[system]) continue;
1623 // check connection, in case connect
1624 if (!Connect(system))
1626 Log(fCurrentDetector, Form("UpdateTable - Couldn't connect to %s FXS database", GetSystemName(system)));
1631 TTimeStamp now; // now
1633 // Loop on FXS list entries
1634 TIter iter(&fFXSlist[system]);
1635 TObjString *aFXSentry=0;
1636 while ((aFXSentry = dynamic_cast<TObjString*> (iter.Next())))
1638 TString aFXSentrystr = aFXSentry->String();
1639 TObjArray *aFXSarray = aFXSentrystr.Tokenize("#!?!#");
1640 if (!aFXSarray || aFXSarray->GetEntries() != 2 )
1642 Log(fCurrentDetector, Form("UpdateTable - error updating %s FXS entry. Check string: <%s>",
1643 GetSystemName(system), aFXSentrystr.Data()));
1644 if(aFXSarray) delete aFXSarray;
1648 const char* fileId = ((TObjString*) aFXSarray->At(0))->GetName();
1649 const char* source = ((TObjString*) aFXSarray->At(1))->GetName();
1651 TString whereClause;
1654 whereClause = Form("where run=%d and detector=\"%s\" and fileId=\"%s\" and DAQsource=\"%s\";",
1655 GetCurrentRun(), fCurrentDetector.Data(), fileId, source);
1657 else if (system == kDCS)
1659 whereClause = Form("where run=%d and detector=\"%s\" and fileId=\"%s\";",
1660 GetCurrentRun(), fCurrentDetector.Data(), fileId);
1662 else if (system == kHLT)
1664 whereClause = Form("where run=%d and detector=\"%s\" and fileId=\"%s\" and DDLnumbers=\"%s\";",
1665 GetCurrentRun(), fCurrentDetector.Data(), fileId, source);
1670 TString sqlQuery = Form("update %s set time_processed=%d %s", fConfig->GetFXSdbTable(system),
1671 now.GetSec(), whereClause.Data());
1673 AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
1676 TSQLResult* aResult;
1677 aResult = dynamic_cast<TSQLResult*> (fServer[system]->Query(sqlQuery));
1680 Log(fCurrentDetector, Form("UpdateTable - %s db: can't execute SQL query <%s>",
1681 GetSystemName(system), sqlQuery.Data()));
1692 //______________________________________________________________________________________________
1693 Bool_t AliShuttle::UpdateTableFailCase()
1695 // Update FXS table filling time_processed field in all rows corresponding to current run and detector
1696 // this is called in case the preprocessor is declared failed for the current run, because
1697 // the fields are updated only in case of success
1699 Bool_t result = kTRUE;
1701 for (UInt_t system=0; system<3; system++)
1703 // check connection, in case connect
1704 if (!Connect(system))
1706 Log(fCurrentDetector, Form("UpdateTableFailCase - Couldn't connect to %s FXS database",
1707 GetSystemName(system)));
1712 TTimeStamp now; // now
1714 // Loop on FXS list entries
1716 TString whereClause = Form("where run=%d and detector=\"%s\";",
1717 GetCurrentRun(), fCurrentDetector.Data());
1720 TString sqlQuery = Form("update %s set time_processed=%d %s", fConfig->GetFXSdbTable(system),
1721 now.GetSec(), whereClause.Data());
1723 AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
1726 TSQLResult* aResult;
1727 aResult = dynamic_cast<TSQLResult*> (fServer[system]->Query(sqlQuery));
1730 Log(fCurrentDetector, Form("UpdateTableFailCase - %s db: can't execute SQL query <%s>",
1731 GetSystemName(system), sqlQuery.Data()));
1741 //______________________________________________________________________________________________
1742 Bool_t AliShuttle::UpdateShuttleLogbook(const char* detector, const char* status)
1745 // Update Shuttle logbook filling detector or shuttle_done column
1746 // ex. of usage: UpdateShuttleLogbook("PHOS", "DONE") or UpdateShuttleLogbook("shuttle_done")
1749 // check connection, in case connect
1751 Log("SHUTTLE", "UpdateShuttleLogbook - Couldn't connect to DAQ Logbook.");
1755 TString detName(detector);
1757 if(detName == "shuttle_done")
1759 setClause = "set shuttle_done=1";
1761 // Send the information to ML
1762 TMonaLisaText mlStatus("SHUTTLE_status", "Done");
1765 mlList.Add(&mlStatus);
1767 fMonaLisa->SendParameters(&mlList);
1769 TString statusStr(status);
1770 if(statusStr.Contains("done", TString::kIgnoreCase) ||
1771 statusStr.Contains("failed", TString::kIgnoreCase)){
1772 setClause = Form("set %s=\"%s\"", detector, status);
1775 Form("UpdateShuttleLogbook - Invalid status <%s> for detector %s",
1781 TString whereClause = Form("where run=%d", GetCurrentRun());
1783 TString sqlQuery = Form("update %s %s %s",
1784 fConfig->GetShuttlelbTable(), setClause.Data(), whereClause.Data());
1786 AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
1789 TSQLResult* aResult;
1790 aResult = dynamic_cast<TSQLResult*> (fServer[3]->Query(sqlQuery));
1792 Log("SHUTTLE", Form("UpdateShuttleLogbook - Can't execute query <%s>", sqlQuery.Data()));
1800 //______________________________________________________________________________________________
1801 Int_t AliShuttle::GetCurrentRun() const
1803 // Get current run from logbook entry
1805 return fLogbookEntry ? fLogbookEntry->GetRun() : -1;
1808 //______________________________________________________________________________________________
1809 UInt_t AliShuttle::GetCurrentStartTime() const
1811 // get current start time
1813 return fLogbookEntry ? fLogbookEntry->GetStartTime() : 0;
1816 //______________________________________________________________________________________________
1817 UInt_t AliShuttle::GetCurrentEndTime() const
1819 // get current end time from logbook entry
1821 return fLogbookEntry ? fLogbookEntry->GetEndTime() : 0;
1824 //______________________________________________________________________________________________
1825 void AliShuttle::Log(const char* detector, const char* message)
1827 // Fill log string with a message
1829 void* dir = gSystem->OpenDirectory(GetShuttleLogDir());
1831 if (gSystem->mkdir(GetShuttleLogDir(), kTRUE)) {
1832 AliError(Form("Can't open directory <%s>", GetShuttleLogDir()));
1837 gSystem->FreeDirectory(dir);
1840 TString toLog = Form("%s (%d): %s - ", TTimeStamp(time(0)).AsString("s"), getpid(), detector);
1841 if (GetCurrentRun() >= 0)
1842 toLog += Form("run %d - ", GetCurrentRun());
1843 toLog += Form("%s", message);
1845 AliInfo(toLog.Data());
1848 if (GetCurrentRun() >= 0)
1849 fileName.Form("%s/%s_%d.log", GetShuttleLogDir(), detector, GetCurrentRun());
1851 fileName.Form("%s/%s.log", GetShuttleLogDir(), detector);
1853 gSystem->ExpandPathName(fileName);
1856 logFile.open(fileName, ofstream::out | ofstream::app);
1858 if (!logFile.is_open()) {
1859 AliError(Form("Could not open file %s", fileName.Data()));
1863 logFile << toLog.Data() << "\n";
1868 //______________________________________________________________________________________________
1869 Bool_t AliShuttle::Collect(Int_t run)
1872 // Collects conditions data for all UNPROCESSED run written to DAQ LogBook in case of run = -1 (default)
1873 // If a dedicated run is given this run is processed
1875 // In operational mode, this is the Shuttle function triggered by the EOR signal.
1879 Log("SHUTTLE","Collect - Shuttle called. Collecting conditions data for unprocessed runs");
1881 Log("SHUTTLE", Form("Collect - Shuttle called. Collecting conditions data for run %d", run));
1883 SetLastAction("Starting");
1885 TString whereClause("where shuttle_done=0");
1887 whereClause += Form(" and run=%d", run);
1889 TObjArray shuttleLogbookEntries;
1890 if (!QueryShuttleLogbook(whereClause, shuttleLogbookEntries))
1892 Log("SHUTTLE", "Collect - Can't retrieve entries from Shuttle logbook");
1896 if (shuttleLogbookEntries.GetEntries() == 0)
1899 Log("SHUTTLE","Collect - Found no UNPROCESSED runs in Shuttle logbook");
1901 Log("SHUTTLE", Form("Collect - Run %d is already DONE "
1902 "or it does not exist in Shuttle logbook", run));
1906 for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
1907 fFirstUnprocessed[iDet] = kTRUE;
1911 // query Shuttle logbook for earlier runs, check if some detectors are unprocessed,
1912 // flag them into fFirstUnprocessed array
1913 TString whereClause(Form("where shuttle_done=0 and run < %d", run));
1914 TObjArray tmpLogbookEntries;
1915 if (!QueryShuttleLogbook(whereClause, tmpLogbookEntries))
1917 Log("SHUTTLE", "Collect - Can't retrieve entries from Shuttle logbook");
1921 TIter iter(&tmpLogbookEntries);
1922 AliShuttleLogbookEntry* anEntry = 0;
1923 while ((anEntry = dynamic_cast<AliShuttleLogbookEntry*> (iter.Next())))
1925 for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
1927 if (anEntry->GetDetectorStatus(iDet) == AliShuttleLogbookEntry::kUnprocessed)
1929 AliDebug(2, Form("Run %d: setting %s as \"not first time unprocessed\"",
1930 anEntry->GetRun(), GetDetName(iDet)));
1931 fFirstUnprocessed[iDet] = kFALSE;
1939 if (!RetrieveConditionsData(shuttleLogbookEntries))
1941 Log("SHUTTLE", "Collect - Process of at least one run failed");
1945 Log("SHUTTLE", "Collect - Requested run(s) successfully processed");
1949 //______________________________________________________________________________________________
1950 Bool_t AliShuttle::RetrieveConditionsData(const TObjArray& dateEntries)
1952 // Retrieve conditions data for all runs that aren't processed yet
1954 Bool_t hasError = kFALSE;
1956 TIter iter(&dateEntries);
1957 AliShuttleLogbookEntry* anEntry;
1959 while ((anEntry = (AliShuttleLogbookEntry*) iter.Next())){
1960 if (!Process(anEntry)){
1964 // clean SHUTTLE temp directory
1965 TString filename = Form("%s/*.shuttle", GetShuttleTempDir());
1966 RemoveFile(filename.Data());
1969 return hasError == kFALSE;
1972 //______________________________________________________________________________________________
1973 ULong_t AliShuttle::GetTimeOfLastAction() const
1977 fMonitoringMutex->Lock();
1979 tmp = fLastActionTime;
1981 fMonitoringMutex->UnLock();
1986 //______________________________________________________________________________________________
1987 const TString AliShuttle::GetLastAction() const
1989 // returns a string description of the last action
1993 fMonitoringMutex->Lock();
1997 fMonitoringMutex->UnLock();
2002 //______________________________________________________________________________________________
2003 void AliShuttle::SetLastAction(const char* action)
2005 // updates the monitoring variables
2007 fMonitoringMutex->Lock();
2009 fLastAction = action;
2010 fLastActionTime = time(0);
2012 fMonitoringMutex->UnLock();
2015 //______________________________________________________________________________________________
2016 const char* AliShuttle::GetRunParameter(const char* param)
2018 // returns run parameter read from DAQ logbook
2020 if(!fLogbookEntry) {
2021 AliError("No logbook entry!");
2025 return fLogbookEntry->GetRunParameter(param);
2028 //______________________________________________________________________________________________
2029 AliCDBEntry* AliShuttle::GetFromOCDB(const AliCDBPath& path)
2031 // returns obiect from OCDB valid for current run
2033 AliCDBStorage *sto = AliCDBManager::Instance()->GetStorage(fgkMainCDB);
2036 Log("SHUTTLE", "GetFromOCDB - Cannot activate main OCDB for query!");
2040 return dynamic_cast<AliCDBEntry*> (sto->Get(path, GetCurrentRun()));
2043 //______________________________________________________________________________________________
2044 Bool_t AliShuttle::SendMail()
2046 // sends a mail to the subdetector expert in case of preprocessor error
2048 void* dir = gSystem->OpenDirectory(GetShuttleLogDir());
2051 if (gSystem->mkdir(GetShuttleLogDir(), kTRUE))
2053 AliError(Form("Can't open directory <%s>", GetShuttleLogDir()));
2058 gSystem->FreeDirectory(dir);
2061 TString bodyFileName;
2062 bodyFileName.Form("%s/mail.body", GetShuttleLogDir());
2063 gSystem->ExpandPathName(bodyFileName);
2066 mailBody.open(bodyFileName, ofstream::out);
2068 if (!mailBody.is_open())
2070 AliError(Form("Could not open mail body file %s", bodyFileName.Data()));
2075 TIter iterExperts(fConfig->GetResponsibles(fCurrentDetector));
2076 TObjString *anExpert=0;
2077 while ((anExpert = (TObjString*) iterExperts.Next()))
2079 to += Form("%s,", anExpert->GetName());
2081 to.Remove(to.Length()-1);
2082 AliDebug(2, Form("to: %s",to.Data()));
2084 // TODO this will be removed...
2085 if (to.Contains("not_yet_set")) {
2086 AliInfo("List of detector responsibles not yet set!");
2090 TString cc="alberto.colla@cern.ch";
2092 TString subject = Form("%s Shuttle preprocessor error in run %d !",
2093 fCurrentDetector.Data(), GetCurrentRun());
2094 AliDebug(2, Form("subject: %s", subject.Data()));
2096 TString body = Form("Dear %s expert(s), \n\n", fCurrentDetector.Data());
2097 body += Form("SHUTTLE just detected that your preprocessor "
2098 "exited with ERROR state in run %d!!\n\n", GetCurrentRun());
2099 body += Form("Please check %s status on the web page asap!\n\n", fCurrentDetector.Data());
2100 body += Form("The last 10 lines of %s log file are following:\n\n");
2102 AliDebug(2, Form("Body begin: %s", body.Data()));
2104 mailBody << body.Data();
2106 mailBody.open(bodyFileName, ofstream::out | ofstream::app);
2108 TString logFileName = Form("%s/%s_%d.log", GetShuttleLogDir(), fCurrentDetector.Data(), GetCurrentRun());
2109 TString tailCommand = Form("tail -n 10 %s >> %s", logFileName.Data(), bodyFileName.Data());
2110 if (gSystem->Exec(tailCommand.Data()))
2112 mailBody << Form("%s log file not found ...\n\n", fCurrentDetector.Data());
2115 TString endBody = Form("------------------------------------------------------\n\n");
2116 endBody += Form("In case of problems please contact the SHUTTLE core team.\n\n");
2117 endBody += "Please do not answer this message directly, it is automatically generated.\n\n";
2118 endBody += "Sincerely yours,\n\n \t\t\tthe SHUTTLE\n";
2120 AliDebug(2, Form("Body end: %s", endBody.Data()));
2122 mailBody << endBody.Data();
2127 TString mailCommand = Form("mail -s \"%s\" -c %s %s < %s",
2131 bodyFileName.Data());
2132 AliDebug(2, Form("mail command: %s", mailCommand.Data()));
2134 Bool_t result = gSystem->Exec(mailCommand.Data());
2139 //______________________________________________________________________________________________
2140 void AliShuttle::SetRunType()
2142 // Gets run type from logbook and fills current Shuttle logbook entry
2144 // check connection, in case connect
2146 Log("SHUTTLE", "SetRunType - Couldn't connect to DAQ Logbook.");
2150 TString sqlQuery = Form("select detector,run_type from %s where run_number=%d",
2151 fConfig->GetRunTypelbTable(), GetCurrentRun());
2153 AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
2156 TSQLResult* aResult;
2157 aResult = dynamic_cast<TSQLResult*> (fServer[3]->Query(sqlQuery));
2159 Log("SHUTTLE", Form("SetRunType - Can't execute query <%s>", sqlQuery.Data()));
2164 while ((aRow = aResult->Next())) {
2165 TString det(aRow->GetField(0), aRow->GetFieldLength(0));
2166 TString runType(aRow->GetField(1), aRow->GetFieldLength(1));
2168 fLogbookEntry->SetRunType(det, runType);
2178 //______________________________________________________________________________________________
2179 const char* AliShuttle::GetRunType(const char* detCode)
2181 // returns run type read from "run type" logbook
2183 if(!fLogbookEntry) {
2184 AliError("No logbook entry!");
2188 return fLogbookEntry->GetRunType(detCode);
2191 //______________________________________________________________________________________________
2192 void AliShuttle::SetShuttleTempDir(const char* tmpDir)
2194 // sets Shuttle temp directory
2196 fgkShuttleTempDir = gSystem->ExpandPathName(tmpDir);
2199 //______________________________________________________________________________________________
2200 void AliShuttle::SetShuttleLogDir(const char* logDir)
2202 // sets Shuttle log directory
2204 fgkShuttleLogDir = gSystem->ExpandPathName(logDir);