Check md5sum of retrieved FXS file inside "retry loop"; clean temp folder after
[u/mrichter/AliRoot.git] / SHUTTLE / AliShuttle.cxx
1 /**************************************************************************
2  * Copyright(c) 1998-1999, ALICE Experiment at CERN, All rights reserved. *
3  *                                                                        *
4  * Author: The ALICE Off-line Project.                                    *
5  * Contributors are mentioned in the code where appropriate.              *
6  *                                                                        *
7  * Permission to use, copy, modify and distribute this software and its   *
8  * documentation strictly for non-commercial purposes is hereby granted   *
9  * without fee, provided that the above copyright notice appears in all   *
10  * copies and that both the copyright notice and this permission notice   *
11  * appear in the supporting documentation. The authors make no claims     *
12  * about the suitability of this software for any purpose. It is          *
13  * provided "as is" without express or implied warranty.                  *
14  **************************************************************************/
15
16 /*
17 $Log$
18 Revision 1.27  2007/01/30 17:52:42  jgrosseo
19 adding monalisa monitoring
20
21 Revision 1.26  2007/01/23 19:20:03  acolla
22 Removed old ldif files, added TOF, MCH ldif files. Added some options in
23 AliShuttleConfig::Print. Added in Ali Shuttle: SetShuttleTempDir and
24 SetShuttleLogDir
25
26 Revision 1.25  2007/01/15 19:13:52  acolla
27 Moved some AliInfo to AliDebug in SendMail function
28
29 Revision 1.21  2006/12/07 08:51:26  jgrosseo
30 update (alberto):
31 table, db names in ldap configuration
32 added GRP preprocessor
33 DCS data can also be retrieved by data point
34
35 Revision 1.20  2006/11/16 16:16:48  jgrosseo
36 introducing strict run ordering flag
37 removed giving preprocessor name to preprocessor, they have to know their name themselves ;-)
38
39 Revision 1.19  2006/11/06 14:23:04  jgrosseo
40 major update (Alberto)
41 o) reading of run parameters from the logbook
42 o) online offline naming conversion
43 o) standalone DCSclient package
44
45 Revision 1.18  2006/10/20 15:22:59  jgrosseo
46 o) Adding time out to the execution of the preprocessors: The Shuttle forks and the parent process monitors the child
47 o) Merging Collect, CollectAll, CollectNew function
48 o) Removing implementation of empty copy constructors (declaration still there!)
49
50 Revision 1.17  2006/10/05 16:20:55  jgrosseo
51 adapting to new CDB classes
52
53 Revision 1.16  2006/10/05 15:46:26  jgrosseo
54 applying to the new interface
55
56 Revision 1.15  2006/10/02 16:38:39  jgrosseo
57 update (alberto):
58 fixed memory leaks
59 storing of objects that failed to be stored to the grid before
60 interfacing of shuttle status table in daq system
61
62 Revision 1.14  2006/08/29 09:16:05  jgrosseo
63 small update
64
65 Revision 1.13  2006/08/15 10:50:00  jgrosseo
66 effc++ corrections (alberto)
67
68 Revision 1.12  2006/08/08 14:19:29  jgrosseo
69 Update to shuttle classes (Alberto)
70
71 - Possibility to set the full object's path in the Preprocessor's and
72 Shuttle's  Store functions
73 - Possibility to extend the object's run validity in the same classes
74 ("startValidity" and "validityInfinite" parameters)
75 - Implementation of the StoreReferenceData function to store reference
76 data in a dedicated CDB storage.
77
78 Revision 1.11  2006/07/21 07:37:20  jgrosseo
79 last run is stored after each run
80
81 Revision 1.10  2006/07/20 09:54:40  jgrosseo
82 introducing status management: The processing per subdetector is divided into several steps,
83 after each step the status is stored on disk. If the system crashes in any of the steps the Shuttle
84 can keep track of the number of failures and skips further processing after a certain threshold is
85 exceeded. These thresholds can be configured in LDAP.
86
87 Revision 1.9  2006/07/19 10:09:55  jgrosseo
88 new configuration, accesst to DAQ FES (Alberto)
89
90 Revision 1.8  2006/07/11 12:44:36  jgrosseo
91 adding parameters for extended validity range of data produced by preprocessor
92
93 Revision 1.7  2006/07/10 14:37:09  jgrosseo
94 small fix + todo comment
95
96 Revision 1.6  2006/07/10 13:01:41  jgrosseo
97 enhanced storing of last sucessfully processed run (alberto)
98
99 Revision 1.5  2006/07/04 14:59:57  jgrosseo
100 revision of AliDCSValue: Removed wrapper classes, reduced storage size per value by factor 2
101
102 Revision 1.4  2006/06/12 09:11:16  jgrosseo
103 coding conventions (Alberto)
104
105 Revision 1.3  2006/06/06 14:26:40  jgrosseo
106 o) removed files that were moved to STEER
107 o) shuttle updated to follow the new interface (Alberto)
108
109 Revision 1.2  2006/03/07 07:52:34  hristov
110 New version (B.Yordanov)
111
112 Revision 1.6  2005/11/19 17:19:14  byordano
113 RetrieveDATEEntries and RetrieveConditionsData added
114
115 Revision 1.5  2005/11/19 11:09:27  byordano
116 AliShuttle declaration added
117
118 Revision 1.4  2005/11/17 17:47:34  byordano
119 TList changed to TObjArray
120
121 Revision 1.3  2005/11/17 14:43:23  byordano
122 import to local CVS
123
124 Revision 1.1.1.1  2005/10/28 07:33:58  hristov
125 Initial import as subdirectory in AliRoot
126
127 Revision 1.2  2005/09/13 08:41:15  byordano
128 default startTime endTime added
129
130 Revision 1.4  2005/08/30 09:13:02  byordano
131 some docs added
132
133 Revision 1.3  2005/08/29 21:15:47  byordano
134 some docs added
135
136 */
137
138 //
139 // This class is the main manager for AliShuttle. 
140 // It organizes the data retrieval from DCS and call the 
141 // interface methods of AliPreprocessor.
142 // For every detector in AliShuttleConfgi (see AliShuttleConfig),
143 // data for its set of aliases is retrieved. If there is registered
144 // AliPreprocessor for this detector then it will be used
145 // accroding to the schema (see AliPreprocessor).
146 // If there isn't registered AliPreprocessor than the retrieved
147 // data is stored automatically to the undelying AliCDBStorage.
148 // For detSpec is used the alias name.
149 //
150
151 #include "AliShuttle.h"
152
153 #include "AliCDBManager.h"
154 #include "AliCDBStorage.h"
155 #include "AliCDBId.h"
156 #include "AliCDBRunRange.h"
157 #include "AliCDBPath.h"
158 #include "AliCDBEntry.h"
159 #include "AliShuttleConfig.h"
160 #include "DCSClient/AliDCSClient.h"
161 #include "AliLog.h"
162 #include "AliPreprocessor.h"
163 #include "AliShuttleStatus.h"
164 #include "AliShuttleLogbookEntry.h"
165
166 #include <TSystem.h>
167 #include <TObject.h>
168 #include <TString.h>
169 #include <TTimeStamp.h>
170 #include <TObjString.h>
171 #include <TSQLServer.h>
172 #include <TSQLResult.h>
173 #include <TSQLRow.h>
174 #include <TMutex.h>
175
176 #include <TMonaLisaWriter.h>
177
178 #include <fstream>
179
180 #include <sys/types.h>
181 #include <sys/wait.h>
182
183 ClassImp(AliShuttle)
184
185 TString AliShuttle::fgkMainCDB("alien://folder=ShuttleCDB");
186 TString AliShuttle::fgkLocalCDB("local://LocalShuttleCDB");
187 TString AliShuttle::fgkMainRefStorage("alien://folder=ShuttleReference");
188 TString AliShuttle::fgkLocalRefStorage("local://LocalReferenceStorage");
189
190 Bool_t AliShuttle::fgkProcessDCS(kTRUE); 
191
192 TString AliShuttle::fgkShuttleTempDir = gSystem->ExpandPathName("$ALICE_ROOT/SHUTTLE/temp");
193 TString AliShuttle::fgkShuttleLogDir = gSystem->ExpandPathName("$ALICE_ROOT/SHUTTLE/log");
194
195 //______________________________________________________________________________________________
196 AliShuttle::AliShuttle(const AliShuttleConfig* config,
197                 UInt_t timeout, Int_t retries):
198 fConfig(config),
199 fTimeout(timeout), fRetries(retries),
200 fPreprocessorMap(),
201 fLogbookEntry(0),
202 fCurrentDetector(),
203 fStatusEntry(0),
204 fGridError(kFALSE),
205 fMonitoringMutex(0),
206 fLastActionTime(0),
207 fLastAction(),
208 fMonaLisa(0)
209 {
210         //
211         // config: AliShuttleConfig used
212         // timeout: timeout used for AliDCSClient connection
213         // retries: the number of retries in case of connection error.
214         //
215
216         if (!fConfig->IsValid()) AliFatal("********** !!!!! Invalid configuration !!!!! **********");
217         for(int iSys=0;iSys<4;iSys++) {
218                 fServer[iSys]=0;
219                 if (iSys < 3)
220                         fFXSlist[iSys].SetOwner(kTRUE);
221         }
222         fPreprocessorMap.SetOwner(kTRUE);
223
224         for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
225                 fFirstUnprocessed[iDet] = kFALSE;
226
227         fMonitoringMutex = new TMutex();
228 }
229
230 //______________________________________________________________________________________________
231 AliShuttle::~AliShuttle()
232 {
233 // destructor
234
235         fPreprocessorMap.DeleteAll();
236         for(int iSys=0;iSys<4;iSys++)
237                 if(fServer[iSys]) {
238                         fServer[iSys]->Close();
239                         delete fServer[iSys];
240                         fServer[iSys] = 0;
241                 }
242
243         if (fStatusEntry){
244                 delete fStatusEntry;
245                 fStatusEntry = 0;
246         }
247         
248         if (fMonitoringMutex) 
249         {
250                 delete fMonitoringMutex;
251                 fMonitoringMutex = 0;
252         }
253 }
254
255 //______________________________________________________________________________________________
256 void AliShuttle::RegisterPreprocessor(AliPreprocessor* preprocessor)
257 {
258         //
259         // Registers new AliPreprocessor.
260         // It uses GetName() for indentificator of the pre processor.
261         // The pre processor is registered it there isn't any other
262         // with the same identificator (GetName()).
263         //
264
265         const char* detName = preprocessor->GetName();
266         if(GetDetPos(detName) < 0)
267                 AliFatal(Form("********** !!!!! Invalid detector name: %s !!!!! **********", detName));
268
269         if (fPreprocessorMap.GetValue(detName)) {
270                 AliWarning(Form("AliPreprocessor %s is already registered!", detName));
271                 return;
272         }
273
274         fPreprocessorMap.Add(new TObjString(detName), preprocessor);
275 }
276 //______________________________________________________________________________________________
277 UInt_t AliShuttle::Store(const AliCDBPath& path, TObject* object,
278                 AliCDBMetaData* metaData, Int_t validityStart, Bool_t validityInfinite)
279 {
280   // Stores a CDB object in the storage for offline reconstruction. Objects that are not needed for
281   // offline reconstruction, but should be stored anyway (e.g. for debugging) should NOT be stored
282   // using this function. Use StoreReferenceData instead!
283   // It calls WriteToCDB function which perform actual storage
284
285         return WriteToCDB(fgkMainCDB, fgkLocalCDB, path, object,
286                                 metaData, validityStart, validityInfinite);
287
288 }
289
290 //______________________________________________________________________________________________
291 UInt_t AliShuttle::StoreReferenceData(const AliCDBPath& path, TObject* object, AliCDBMetaData* metaData)
292 {
293   // Stores a CDB object in the storage for reference data. This objects will not be available during
294   // offline reconstrunction. Use this function for reference data only!
295   // It calls WriteToCDB function which perform actual storage
296
297         return WriteToCDB(fgkMainRefStorage, fgkLocalRefStorage, path, object, metaData);
298
299 }
300
301 //______________________________________________________________________________________________
302 UInt_t AliShuttle::WriteToCDB(const char* mainUri, const char* localUri,
303                         const AliCDBPath& path, TObject* object, AliCDBMetaData* metaData,
304                         Int_t validityStart, Bool_t validityInfinite)
305 {
306   // write object into the CDB. Parameters are passed by Store and StoreReferenceData functions.
307   // The parameters are:
308   //   1) Uri of the main storage (Grid)
309   //   2) Uri of the backup storage (Local)
310   //   3) the object's path.
311   //   4) the object to be stored
312   //   5) the metaData to be associated with the object
313   //   6) the validity start run number w.r.t. the current run,
314   //      if the data is valid only for this run leave the default 0
315   //   7) specifies if the calibration data is valid for infinity (this means until updated),
316   //      typical for calibration runs, the default is kFALSE
317   //
318   // returns 0 if fail
319   //         1 if stored in main (Grid) storage
320   //         2 if stored in backup (Local) storage
321
322         const char* cdbType = (mainUri == fgkMainCDB) ? "CDB" : "Reference";
323
324         Int_t firstRun = GetCurrentRun() - validityStart;
325         if(firstRun < 0) {
326                 AliError("First valid run happens to be less than 0! Setting it to 0.");
327                 firstRun=0;
328         }
329
330         Int_t lastRun = -1;
331         if(validityInfinite) {
332                 lastRun = AliCDBRunRange::Infinity();
333         } else {
334                 lastRun = GetCurrentRun();
335         }
336
337         AliCDBId id(path, firstRun, lastRun, -1, -1);
338
339         if(! dynamic_cast<TObjString*> (metaData->GetProperty("RunUsed(TObjString)"))){
340                 TObjString runUsed = Form("%d", GetCurrentRun());
341                 metaData->SetProperty("RunUsed(TObjString)", runUsed.Clone());
342         }
343
344         UInt_t result = 0;
345
346         if (!(AliCDBManager::Instance()->GetStorage(mainUri))) {
347                 AliError(Form("WriteToCDB - Cannot activate main %s storage", cdbType));
348         } else {
349                 result = (UInt_t) AliCDBManager::Instance()->GetStorage(mainUri)
350                                         ->Put(object, id, metaData);
351         }
352
353         if(!result) {
354
355                 Log(fCurrentDetector,
356                         Form("WriteToCDB - Problem with main %s storage. Putting <%s> into backup storage",
357                                 cdbType, path.GetPath().Data()));
358
359                 // Set Grid version to current run number, to ease retrieval later
360                 id.SetVersion(GetCurrentRun());
361
362                 result = AliCDBManager::Instance()->GetStorage(localUri)
363                                         ->Put(object, id, metaData);
364
365                 if(result) {
366                         result = 2;
367                         fGridError = kTRUE;
368                 }else{
369                         Log(fCurrentDetector, "WriteToCDB - Can't store data!");
370                 }
371         }
372
373         return result;
374
375 }
376
377 //______________________________________________________________________________________________
378 AliShuttleStatus* AliShuttle::ReadShuttleStatus()
379 {
380 // Reads the AliShuttleStatus from the CDB
381
382         if (fStatusEntry){
383                 delete fStatusEntry;
384                 fStatusEntry = 0;
385         }
386
387         fStatusEntry = AliCDBManager::Instance()->GetStorage(AliShuttle::GetLocalCDB())
388                 ->Get(Form("/SHUTTLE/STATUS/%s", fCurrentDetector.Data()), GetCurrentRun());
389
390         if (!fStatusEntry) return 0;
391         fStatusEntry->SetOwner(1);
392
393         AliShuttleStatus* status = dynamic_cast<AliShuttleStatus*> (fStatusEntry->GetObject());
394         if (!status) {
395                 AliError("Invalid object stored to CDB!");
396                 return 0;
397         }
398
399         return status;
400 }
401
402 //______________________________________________________________________________________________
403 Bool_t AliShuttle::WriteShuttleStatus(AliShuttleStatus* status)
404 {
405 // writes the status for one subdetector
406
407         if (fStatusEntry){
408                 delete fStatusEntry;
409                 fStatusEntry = 0;
410         }
411
412         Int_t run = GetCurrentRun();
413
414         AliCDBId id(AliCDBPath("SHUTTLE", "STATUS", fCurrentDetector), run, run);
415
416         fStatusEntry = new AliCDBEntry(status, id, new AliCDBMetaData);
417         fStatusEntry->SetOwner(1);
418
419         UInt_t result = AliCDBManager::Instance()->GetStorage(fgkLocalCDB)->Put(fStatusEntry);
420
421         if (!result) {
422                 AliError(Form("WriteShuttleStatus for %s, run %d failed", fCurrentDetector.Data(), run));
423                 return kFALSE;
424         }
425         
426         SendMLInfo();
427
428         return kTRUE;
429 }
430
431 //______________________________________________________________________________________________
432 void AliShuttle::UpdateShuttleStatus(AliShuttleStatus::Status newStatus, Bool_t increaseCount)
433 {
434   // changes the AliShuttleStatus for the given detector and run to the given status
435
436         if (!fStatusEntry){
437                 AliError("UNEXPECTED: fStatusEntry empty");
438                 return;
439         }
440
441         AliShuttleStatus* status = dynamic_cast<AliShuttleStatus*> (fStatusEntry->GetObject());
442
443         if (!status){
444                 AliError("UNEXPECTED: status could not be read from current CDB entry");
445                 return;
446         }
447
448         TString actionStr = Form("UpdateShuttleStatus - %s: Changing state from %s to %s",
449                                 fCurrentDetector.Data(),
450                                 status->GetStatusName(),
451                                 status->GetStatusName(newStatus));
452         Log("SHUTTLE", actionStr);
453         SetLastAction(actionStr);
454
455         status->SetStatus(newStatus);
456         if (increaseCount) status->IncreaseCount();
457
458         AliCDBManager::Instance()->GetStorage(fgkLocalCDB)->Put(fStatusEntry);
459
460         SendMLInfo();
461 }
462
463 //______________________________________________________________________________________________
464 void AliShuttle::SendMLInfo()
465 {
466         //
467         // sends ML information about the current status of the current detector being processed
468         //
469         
470         AliShuttleStatus* status = dynamic_cast<AliShuttleStatus*> (fStatusEntry->GetObject());
471         
472         if (!status){
473                 AliError("UNEXPECTED: status could not be read from current CDB entry");
474                 return;
475         }
476         
477         TMonaLisaText  mlStatus(Form("%s_status", fCurrentDetector.Data()), status->GetStatusName());
478         TMonaLisaValue mlRetryCount(Form("%s_count", fCurrentDetector.Data()), status->GetCount());
479
480         TList mlList;
481         mlList.Add(&mlStatus);
482         mlList.Add(&mlRetryCount);
483
484         fMonaLisa->SendParameters(&mlList);
485 }
486
487 //______________________________________________________________________________________________
488 Bool_t AliShuttle::ContinueProcessing()
489 {
490 // this function reads the AliShuttleStatus information from CDB and
491 // checks if the processing should be continued
492 // if yes it returns kTRUE and updates the AliShuttleStatus with nextStatus
493
494         if (!fConfig->HostProcessDetector(fCurrentDetector)) return kFALSE;
495
496         AliPreprocessor* aPreprocessor =
497                 dynamic_cast<AliPreprocessor*> (fPreprocessorMap.GetValue(fCurrentDetector));
498         if (!aPreprocessor)
499         {
500                 AliInfo(Form("%s: no preprocessor registered", fCurrentDetector.Data()));
501                 return kFALSE;
502         }
503
504         AliShuttleLogbookEntry::Status entryStatus =
505                 fLogbookEntry->GetDetectorStatus(fCurrentDetector);
506
507         if(entryStatus != AliShuttleLogbookEntry::kUnprocessed) {
508                 AliInfo(Form("ContinueProcessing - %s is %s",
509                                 fCurrentDetector.Data(),
510                                 fLogbookEntry->GetDetectorStatusName(entryStatus)));
511                 return kFALSE;
512         }
513
514         // if we get here, according to Shuttle logbook subdetector is in UNPROCESSED state
515
516         // check if current run is first unprocessed run for current detector
517         if (fConfig->StrictRunOrder(fCurrentDetector) &&
518                 !fFirstUnprocessed[GetDetPos(fCurrentDetector)])
519         {
520                 Log("SHUTTLE", Form("ContinueProcessing - %s requires strict run ordering but this is not the first unprocessed run!"));
521                 return kFALSE;
522         }
523
524         AliShuttleStatus* status = ReadShuttleStatus();
525         if (!status) {
526                 // first time
527                 Log("SHUTTLE", Form("ContinueProcessing - %s: Processing first time",
528                                 fCurrentDetector.Data()));
529                 status = new AliShuttleStatus(AliShuttleStatus::kStarted);
530                 return WriteShuttleStatus(status);
531         }
532
533         // The following two cases shouldn't happen if Shuttle Logbook was correctly updated.
534         // If it happens it may mean Logbook updating failed... let's do it now!
535         if (status->GetStatus() == AliShuttleStatus::kDone ||
536             status->GetStatus() == AliShuttleStatus::kFailed){
537                 Log("SHUTTLE", Form("ContinueProcessing - %s is already %s. Updating Shuttle Logbook",
538                                         fCurrentDetector.Data(),
539                                         status->GetStatusName(status->GetStatus())));
540                 UpdateShuttleLogbook(fCurrentDetector.Data(),
541                                         status->GetStatusName(status->GetStatus()));
542                 return kFALSE;
543         }
544
545         if (status->GetStatus() == AliShuttleStatus::kStoreFailed) {
546                 Log("SHUTTLE",
547                         Form("ContinueProcessing - %s: Grid storage of one or more objects failed. Trying again now",
548                                 fCurrentDetector.Data()));
549                 if(TryToStoreAgain()){
550                         Log(fCurrentDetector.Data(), "ContinueProcessing - All objects successfully stored into OCDB");
551                         UpdateShuttleStatus(AliShuttleStatus::kDone);
552                         UpdateShuttleLogbook(fCurrentDetector.Data(), "DONE");
553                 } else {
554                         Log("SHUTTLE",
555                                 Form("ContinueProcessing - %s: Grid storage failed again",
556                                         fCurrentDetector.Data()));
557                         // trigger ML information manually because we do not had a status change
558                         SendMLInfo();
559                 }
560                 return kFALSE;
561         }
562
563         // if we get here, there is a restart
564         Bool_t cont = kFALSE;
565
566         // abort conditions
567         if (status->GetCount() >= fConfig->GetMaxRetries()) {
568                 Log("SHUTTLE", Form("ContinueProcessing - %s failed %d times in status %s - "
569                                 "Updating Shuttle Logbook", fCurrentDetector.Data(),
570                                 status->GetCount(), status->GetStatusName()));
571                 UpdateShuttleLogbook(fCurrentDetector.Data(), "FAILED");
572                 UpdateShuttleStatus(AliShuttleStatus::kFailed);
573         } else {
574                 Log("SHUTTLE", Form("ContinueProcessing - %s: restarting. "
575                                 "Aborted before with %s. Retry number %d.", fCurrentDetector.Data(),
576                                 status->GetStatusName(), status->GetCount()));
577                 UpdateShuttleStatus(AliShuttleStatus::kStarted, kTRUE);
578                 cont = kTRUE;
579         }
580
581         // Send mail to detector expert!
582         AliInfo(Form("Sending mail to %s expert...", fCurrentDetector.Data()));
583         if (!SendMail())
584                 Log("SHUTTLE", Form("ContinueProcessing - Could not send mail to %s expert",
585                                 fCurrentDetector.Data()));
586
587         return cont;
588 }
589
590 //______________________________________________________________________________________________
591 Bool_t AliShuttle::Process(AliShuttleLogbookEntry* entry)
592 {
593         //
594         // Makes data retrieval for all detectors in the configuration.
595         // entry: Shuttle logbook entry, contains run paramenters and status of detectors
596         // (Unprocessed, Inactive, Failed or Done).
597         // Returns kFALSE in case of error occured and kTRUE otherwise
598         //
599
600         if(!entry) return kFALSE;
601
602         fLogbookEntry = entry;
603
604         if (fLogbookEntry->IsDone())
605         {
606                 Log("SHUTTLE","Process - Shuttle is already DONE. Updating logbook");
607                 UpdateShuttleLogbook("shuttle_done");
608                 fLogbookEntry = 0;
609                 return kTRUE;
610         }
611
612         // create ML instance that monitors this run
613         fMonaLisa = new TMonaLisaWriter(Form("%d", GetCurrentRun()), "SHUTTLE", "aliendb1.cern.ch");
614         // disable monitoring of other parameters that come e.g. from TFile
615         gMonitoringWriter = 0;
616
617         AliInfo(Form("\n\n \t\t\t^*^*^*^*^*^*^*^*^*^*^*^* run %d: START ^*^*^*^*^*^*^*^*^*^*^*^* \n",
618                                         GetCurrentRun()));
619
620
621         // Send the information to ML
622         TMonaLisaText  mlStatus("SHUTTLE_status", "Processing");
623
624         TList mlList;
625         mlList.Add(&mlStatus);
626
627         fMonaLisa->SendParameters(&mlList);
628                         
629         fLogbookEntry->Print("all");
630
631         // Initialization
632         Bool_t hasError = kFALSE;
633
634         AliCDBStorage *mainCDBSto = AliCDBManager::Instance()->GetStorage(fgkMainCDB);
635         if(mainCDBSto) mainCDBSto->QueryCDB(GetCurrentRun());
636         AliCDBStorage *mainRefSto = AliCDBManager::Instance()->GetStorage(fgkMainRefStorage);
637         if(mainRefSto) mainRefSto->QueryCDB(GetCurrentRun());
638
639         // Loop on detectors in the configuration
640         TIter iter(fConfig->GetDetectors());
641         TObjString* aDetector = 0;
642
643         while ((aDetector = (TObjString*) iter.Next()))
644         {
645                 fCurrentDetector = aDetector->String();
646
647                 if (ContinueProcessing() == kFALSE) continue;
648
649                 AliInfo(Form("\n\n \t\t\t****** run %d - %s: START  ******",
650                                                 GetCurrentRun(), aDetector->GetName()));
651
652                 for(Int_t iSys=0;iSys<3;iSys++) fFXSCalled[iSys]=kFALSE;
653
654                 Log(fCurrentDetector.Data(), "Starting processing");
655
656                 Int_t pid = fork();
657
658                 if (pid < 0)
659                 {
660                         Log("SHUTTLE", "ERROR: Forking failed");
661                 }
662                 else if (pid > 0)
663                 {
664                         // parent
665                         AliInfo(Form("In parent process of %d - %s: Starting monitoring",
666                                                         GetCurrentRun(), aDetector->GetName()));
667
668                         Long_t begin = time(0);
669
670                         int status; // to be used with waitpid, on purpose an int (not Int_t)!
671                         while (waitpid(pid, &status, WNOHANG) == 0)
672                         {
673                                 Long_t expiredTime = time(0) - begin;
674
675                                 if (expiredTime > fConfig->GetPPTimeOut())
676                                 {
677                                         Log("SHUTTLE", Form("Process time out. Run time: %d seconds. Killing...",
678                                                                 expiredTime));
679
680                                         kill(pid, 9);
681
682                                         hasError = kTRUE;
683
684                                         gSystem->Sleep(1000);
685                                 }
686                                 else
687                                 {
688                                         if (expiredTime % 60 == 0)
689                                         Log("SHUTTLE", Form("Checked process. Run time: %d seconds.",
690                                                                 expiredTime));
691                                         gSystem->Sleep(1000);
692                                 }
693                         }
694
695                         AliInfo(Form("In parent process of %d - %s: Client has terminated.",
696                                                                 GetCurrentRun(), aDetector->GetName()));
697
698                         if (WIFEXITED(status))
699                         {
700                                 Int_t returnCode = WEXITSTATUS(status);
701
702                                 Log("SHUTTLE", Form("The return code is %d", returnCode));
703
704                                 if (returnCode != 0)
705                                 hasError = kTRUE;
706                         }
707                 }
708                 else if (pid == 0)
709                 {
710                         // client
711                         AliInfo(Form("In client process of %d - %s", GetCurrentRun(), aDetector->GetName()));
712
713                         UInt_t result = ProcessCurrentDetector();
714
715                         Int_t returnCode = 0; // will be set to 1 in case of an error
716
717                         if (!result)
718                         {
719                                 returnCode = 1;
720                                 AliInfo(Form("\n \t\t\t****** run %d - %s: PREPROCESSOR ERROR ****** \n\n",
721                                                         GetCurrentRun(), aDetector->GetName()));
722                         }
723                         else if (result == 2)
724                         {
725                                 AliInfo(Form("\n \t\t\t****** run %d - %s: STORAGE ERROR ****** \n\n",
726                                                         GetCurrentRun(), aDetector->GetName()));
727                         } else
728                         {
729                                 AliInfo(Form("\n \t\t\t****** run %d - %s: DONE ****** \n\n",
730                                                         GetCurrentRun(), aDetector->GetName()));
731                         }
732
733                         if (result > 0)
734                         {
735                                 // Process successful: Update time_processed field in FXS logbooks!
736                                 if (UpdateTable() == kFALSE) returnCode = 1;
737                         }
738
739                         for (UInt_t iSys=0; iSys<3; iSys++)
740                         {
741                                 if (fFXSCalled[iSys]) fFXSlist[iSys].Clear();
742                         }
743
744                         AliInfo(Form("Client process of %d - %s is exiting now with %d.",
745                                                         GetCurrentRun(), aDetector->GetName(), returnCode));
746
747                         // the client exits here
748                         gSystem->Exit(returnCode);
749
750                         AliError("We should never get here!!!");
751                 }
752         }
753
754         AliInfo(Form("\n\n \t\t\t^*^*^*^*^*^*^*^*^*^*^*^* run %d: FINISH ^*^*^*^*^*^*^*^*^*^*^*^* \n",
755                                                         GetCurrentRun()));
756
757         //check if shuttle is done for this run, if so update logbook
758         TObjArray checkEntryArray;
759         checkEntryArray.SetOwner(1);
760         TString whereClause = Form("where run=%d", GetCurrentRun());
761         if (!QueryShuttleLogbook(whereClause.Data(), checkEntryArray) || checkEntryArray.GetEntries() == 0) {
762                 Log("SHUTTLE", Form("Process - Warning: Cannot check status of run %d on Shuttle logbook!",
763                                                 GetCurrentRun()));
764                 return hasError == kFALSE;
765         }
766
767         AliShuttleLogbookEntry* checkEntry = dynamic_cast<AliShuttleLogbookEntry*>
768                                                 (checkEntryArray.At(0));
769
770         if (checkEntry)
771         {
772                 if (checkEntry->IsDone())
773                 {
774                         Log("SHUTTLE","Process - Shuttle is DONE. Updating logbook");
775                         UpdateShuttleLogbook("shuttle_done");
776                 }
777                 else
778                 {
779                         for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
780                         {
781                                 if (checkEntry->GetDetectorStatus(iDet) == AliShuttleLogbookEntry::kUnprocessed)
782                                 {
783                                         AliDebug(2, Form("Run %d: setting %s as \"not first time unprocessed\"",
784                                                         checkEntry->GetRun(), GetDetName(iDet)));
785                                         fFirstUnprocessed[iDet] = kFALSE;
786                                 }
787                         }
788                 }
789         }
790
791         // remove ML instance
792         delete fMonaLisa;
793         fMonaLisa = 0;
794
795         fLogbookEntry = 0;
796
797         return hasError == kFALSE;
798 }
799
800 //______________________________________________________________________________________________
801 UInt_t AliShuttle::ProcessCurrentDetector()
802 {
803         //
804         // Makes data retrieval just for a specific detector (fCurrentDetector).
805         // Threre should be a configuration for this detector.
806
807         AliInfo(Form("Retrieving values for %s, run %d", fCurrentDetector.Data(), GetCurrentRun()));
808
809         UpdateShuttleStatus(AliShuttleStatus::kDCSStarted);
810
811         TMap dcsMap;
812         dcsMap.SetOwner(1);
813
814         Bool_t aDCSError = kFALSE;
815         fGridError = kFALSE;
816
817         // TODO Test only... I've added a flag that allows to
818         // exclude DCS archive DB query
819         if (!fgkProcessDCS)
820         {
821                 AliInfo("Skipping DCS processing!");
822                 aDCSError = kFALSE;
823         } else {
824                 TString host(fConfig->GetDCSHost(fCurrentDetector));
825                 Int_t port = fConfig->GetDCSPort(fCurrentDetector);
826
827                 // Retrieval of Aliases
828                 TObjString* anAlias = 0;
829                 Int_t iAlias = 1;
830                 Int_t nTotAliases= ((TMap*)fConfig->GetDCSAliases(fCurrentDetector))->GetEntries();
831                 TIter iterAliases(fConfig->GetDCSAliases(fCurrentDetector));
832                 while ((anAlias = (TObjString*) iterAliases.Next()))
833                 {
834                         TObjArray *valueSet = new TObjArray();
835                         valueSet->SetOwner(1);
836
837                         if (((iAlias-1) % 500) == 0 || iAlias == nTotAliases)
838                                 AliInfo(Form("Querying DCS archive: alias %s (%d of %d)",
839                                                 anAlias->GetName(), iAlias++, nTotAliases));
840                         aDCSError = (GetValueSet(host, port, anAlias->String(), valueSet, kAlias) == 0);
841
842                         if(!aDCSError)
843                         {
844                                 dcsMap.Add(anAlias->Clone(), valueSet);
845                         } else {
846                                 Log(fCurrentDetector,
847                                         Form("ProcessCurrentDetector - Error while retrieving alias %s",
848                                                 anAlias->GetName()));
849                                 UpdateShuttleStatus(AliShuttleStatus::kDCSError);
850                                 dcsMap.DeleteAll();
851                                 return 0;
852                         }
853                 }
854
855                 // Retrieval of Data Points
856                 TObjString* aDP = 0;
857                 Int_t iDP = 0;
858                 Int_t nTotDPs= ((TMap*)fConfig->GetDCSDataPoints(fCurrentDetector))->GetEntries();
859                 TIter iterDP(fConfig->GetDCSDataPoints(fCurrentDetector));
860                 while ((aDP = (TObjString*) iterDP.Next()))
861                 {
862                         TObjArray *valueSet = new TObjArray();
863                         valueSet->SetOwner(1);
864                         if (((iDP-1) % 500) == 0 || iDP == nTotDPs)
865                                 AliInfo(Form("Querying DCS archive: DP %s (%d of %d)",
866                                                 aDP->GetName(), iDP++, nTotDPs));
867                         aDCSError = (GetValueSet(host, port, aDP->String(), valueSet, kDP) == 0);
868
869                         if(!aDCSError)
870                         {
871                                 dcsMap.Add(aDP->Clone(), valueSet);
872                         } else {
873                                 Log(fCurrentDetector,
874                                         Form("ProcessCurrentDetector - Error while retrieving data point %s",
875                                                 aDP->GetName()));
876                                 UpdateShuttleStatus(AliShuttleStatus::kDCSError);
877                                 dcsMap.DeleteAll();
878                                 return 0;
879                         }
880                 }
881         }
882
883         // DCS Archive DB processing successful. Call Preprocessor!
884         UpdateShuttleStatus(AliShuttleStatus::kPPStarted);
885
886         AliPreprocessor* aPreprocessor =
887                 dynamic_cast<AliPreprocessor*> (fPreprocessorMap.GetValue(fCurrentDetector));
888
889         aPreprocessor->Initialize(GetCurrentRun(), GetCurrentStartTime(), GetCurrentEndTime());
890         UInt_t aPPResult = aPreprocessor->Process(&dcsMap);
891
892         UInt_t returnValue = 0;
893         if (aPPResult == 0) { // Preprocessor error
894                 UpdateShuttleStatus(AliShuttleStatus::kPPError);
895                 returnValue = 0;
896         } else if (fGridError == kFALSE) { // process and Grid storage ok!
897                 UpdateShuttleStatus(AliShuttleStatus::kDone);
898                 UpdateShuttleLogbook(fCurrentDetector, "DONE");
899                 Log(fCurrentDetector.Data(),
900                         "ProcessCurrentDetector - Preprocessor and Grid storage ended successfully");
901                 returnValue = 1;
902         } else { // Grid storage error (process ok, but object put in local storage)
903                 UpdateShuttleStatus(AliShuttleStatus::kStoreFailed);
904                 returnValue = 2;
905         }
906
907         dcsMap.DeleteAll();
908
909         return returnValue;
910 }
911
912 //______________________________________________________________________________________________
913 Bool_t AliShuttle::QueryShuttleLogbook(const char* whereClause,
914                 TObjArray& entries)
915 {
916 // Query DAQ's Shuttle logbook and fills detector status object.
917 // Call QueryRunParameters to query DAQ logbook for run parameters.
918
919         entries.SetOwner(1);
920
921         // check connection, in case connect
922         if(!Connect(3)) return kFALSE;
923
924         TString sqlQuery;
925         sqlQuery = Form("select * from logbook_shuttle %s order by run", whereClause);
926
927         TSQLResult* aResult = fServer[3]->Query(sqlQuery);
928         if (!aResult) {
929                 AliError(Form("Can't execute query <%s>!", sqlQuery.Data()));
930                 return kFALSE;
931         }
932
933         AliDebug(2,Form("Query = %s", sqlQuery.Data()));
934
935         if(aResult->GetRowCount() == 0) {
936 //              if(sqlQuery.EndsWith("where shuttle_done=0 order by run")){
937 //                      Log("SHUTTLE", "QueryShuttleLogbook - All runs in Shuttle Logbook are already DONE");
938 //                      delete aResult;
939 //                      return kTRUE;
940 //              } else {
941                         AliInfo("No entries in Shuttle Logbook match request");
942                         delete aResult;
943                         return kTRUE;
944 //              }
945         }
946
947         // TODO Check field count!
948         const UInt_t nCols = 22;
949         if (aResult->GetFieldCount() != (Int_t) nCols) {
950                 AliError("Invalid SQL result field number!");
951                 delete aResult;
952                 return kFALSE;
953         }
954
955         TSQLRow* aRow;
956         while ((aRow = aResult->Next())) {
957                 TString runString(aRow->GetField(0), aRow->GetFieldLength(0));
958                 Int_t run = runString.Atoi();
959
960                 AliShuttleLogbookEntry *entry = QueryRunParameters(run);
961                 if (!entry)
962                         continue;
963
964                 // loop on detectors
965                 for(UInt_t ii = 0; ii < nCols; ii++)
966                         entry->SetDetectorStatus(aResult->GetFieldName(ii), aRow->GetField(ii));
967
968                 entries.AddLast(entry);
969                 delete aRow;
970         }
971
972 //      if(sqlQuery.EndsWith("where shuttle_done=0 order by run"))
973 //              Log("SHUTTLE", Form("QueryShuttleLogbook - Found %d unprocessed runs in Shuttle Logbook",
974 //                                                      entries.GetEntriesFast()));
975         delete aResult;
976         return kTRUE;
977 }
978
979 //______________________________________________________________________________________________
980 AliShuttleLogbookEntry* AliShuttle::QueryRunParameters(Int_t run)
981 {
982         //
983         // Retrieve run parameters written in the DAQ logbook and sets them into AliShuttleLogbookEntry object
984         //
985
986         // check connection, in case connect
987         if (!Connect(3))
988                 return 0;
989
990         TString sqlQuery;
991         sqlQuery.Form("select * from %s where run=%d", fConfig->GetDAQlbTable(), run);
992
993         TSQLResult* aResult = fServer[3]->Query(sqlQuery);
994         if (!aResult) {
995                 AliError(Form("Can't execute query <%s>!", sqlQuery.Data()));
996                 return 0;
997         }
998
999         if (aResult->GetRowCount() == 0) {
1000                 Log("SHUTTLE", Form("QueryRunParameters - No entry in DAQ Logbook for run %d. Skipping", run));
1001                 delete aResult;
1002                 return 0;
1003         }
1004
1005         if (aResult->GetRowCount() > 1) {
1006                 AliError(Form("More than one entry in DAQ Logbook for run %d. Skipping", run));
1007                 delete aResult;
1008                 return 0;
1009         }
1010
1011         TSQLRow* aRow = aResult->Next();
1012         if (!aRow)
1013         {
1014                 AliError(Form("Could not retrieve row for run %d. Skipping", run));
1015                 delete aResult;
1016                 return 0;
1017         }
1018
1019         AliShuttleLogbookEntry* entry = new AliShuttleLogbookEntry(run);
1020
1021         for (Int_t ii = 0; ii < aResult->GetFieldCount(); ii++)
1022                 entry->SetRunParameter(aResult->GetFieldName(ii), aRow->GetField(ii));
1023
1024         UInt_t startTime = entry->GetStartTime();
1025         UInt_t endTime = entry->GetEndTime();
1026
1027         if (!startTime || !endTime || startTime > endTime) {
1028                 Log("SHUTTLE",
1029                         Form("QueryRunParameters - Invalid parameters for Run %d: startTime = %d, endTime = %d",
1030                                 run, startTime, endTime));
1031                 delete entry;
1032                 delete aRow;
1033                 delete aResult;
1034                 return 0;
1035         }
1036
1037         delete aRow;
1038         delete aResult;
1039
1040         return entry;
1041 }
1042
1043 //______________________________________________________________________________________________
1044 Bool_t AliShuttle::TryToStoreAgain()
1045 {
1046   // Called in case the detector failed to store the object in Grid OCDB
1047   // It tries to store the object again, if it does not find more recent and overlapping objects
1048   // Calls underlying TryToStoreAgain(const char*) function twice, for OCDB and Reference storage.
1049
1050         AliInfo("Trying to store OCDB data again...");
1051         Bool_t resultCDB = TryToStoreAgain(fgkMainCDB);
1052
1053         AliInfo("Trying to store reference data again...");
1054         Bool_t resultRef = TryToStoreAgain(fgkMainRefStorage);
1055
1056         return resultCDB && resultRef;
1057 }
1058
1059 //______________________________________________________________________________________________
1060 Bool_t AliShuttle::TryToStoreAgain(TString& gridURI)
1061 {
1062   // Called by TryToStoreAgain(), performs actual storage retry
1063
1064         TObjArray* gridIds=0;
1065
1066         Bool_t result = kTRUE;
1067
1068         const char* type = 0;
1069         TString backupURI;
1070         if(gridURI == fgkMainCDB) {
1071                 type = "OCDB";
1072                 backupURI = fgkLocalCDB;
1073         } else if(gridURI == fgkMainRefStorage) {
1074                 type = "reference";
1075                 backupURI = fgkLocalRefStorage;
1076         } else {
1077                 AliError(Form("Invalid storage URI: %s", gridURI.Data()));
1078                 return kFALSE;
1079         }
1080
1081         AliCDBManager* man = AliCDBManager::Instance();
1082
1083         AliCDBStorage *gridSto = man->GetStorage(gridURI);
1084         if(!gridSto) {
1085                 Log(fCurrentDetector.Data(),
1086                         Form("TryToStoreAgain - cannot activate main %s storage", type));
1087                 return kFALSE;
1088         }
1089
1090         gridIds = gridSto->GetQueryCDBList();
1091
1092         // get objects previously stored in local CDB
1093         AliCDBStorage *backupSto = man->GetStorage(backupURI);
1094         AliCDBPath aPath(GetOfflineDetName(fCurrentDetector.Data()),"*","*");
1095         // Local objects were stored with current run as Grid version!
1096         TList* localEntries = backupSto->GetAll(aPath.GetPath(), GetCurrentRun(), GetCurrentRun());
1097         localEntries->SetOwner(1);
1098
1099         // loop on local stored objects
1100         TIter localIter(localEntries);
1101         AliCDBEntry *aLocEntry = 0;
1102         while((aLocEntry = dynamic_cast<AliCDBEntry*> (localIter.Next()))){
1103                 aLocEntry->SetOwner(1);
1104                 AliCDBId aLocId = aLocEntry->GetId();
1105                 aLocEntry->SetVersion(-1);
1106                 aLocEntry->SetSubVersion(-1);
1107
1108                 // loop on Grid valid Id's
1109                 Bool_t store = kTRUE;
1110                 TIter gridIter(gridIds);
1111                 AliCDBId* aGridId = 0;
1112                 while((aGridId = dynamic_cast<AliCDBId*> (gridIter.Next()))){
1113                         // If local object is valid up to infinity we store it only if it is
1114                         // the first unprocessed run!
1115                         if (aLocId.GetLastRun() == AliCDBRunRange::Infinity())
1116                         {
1117                                 if (!fFirstUnprocessed[GetDetPos(fCurrentDetector)])
1118                                 {
1119                                         Log(fCurrentDetector.Data(),
1120                                                 ("TryToStoreAgain - This object has validity infinite but "
1121                                                  "there are previous unprocessed runs!"));
1122                                         continue;
1123                                 } else {
1124                                         break;
1125                                 }
1126                         }
1127                         if(aGridId->GetPath() != aLocId.GetPath()) continue;
1128                         // skip all objects valid up to infinity
1129                         if(aGridId->GetLastRun() == AliCDBRunRange::Infinity()) continue;
1130                         // if we get here, it means there's already some more recent object stored on Grid!
1131                         store = kFALSE;
1132                         break;
1133                 }
1134
1135                 if(!store){
1136                         Log(fCurrentDetector.Data(),
1137                                 Form("TryToStoreAgain - A more recent object already exists in %s storage: <%s>",
1138                                         type, aGridId->ToString().Data()));
1139                         // removing local filename...
1140                         // TODO maybe it's better not to remove it, it was not copied to the Grid!
1141                         TString filename;
1142                         backupSto->IdToFilename(aLocId, filename);
1143                         AliInfo(Form("Removing local file %s", filename.Data()));
1144                         gSystem->Exec(Form("rm %s",filename.Data()));
1145                         continue;
1146                 }
1147
1148                 // If we get here, the file can be stored!
1149                 Bool_t storeOk = gridSto->Put(aLocEntry);
1150                 if(storeOk){
1151                         Log(fCurrentDetector.Data(),
1152                                 Form("TryToStoreAgain - Object <%s> successfully put into %s storage",
1153                                         aLocId.ToString().Data(), type));
1154
1155                         // removing local filename...
1156                         TString filename;
1157                         backupSto->IdToFilename(aLocId, filename);
1158                         AliInfo(Form("Removing local file %s", filename.Data()));
1159                         gSystem->Exec(Form("rm %s", filename.Data()));
1160                         continue;
1161                 } else  {
1162                         Log(fCurrentDetector.Data(),
1163                                 Form("TryToStoreAgain - Grid %s storage of object <%s> failed again",
1164                                         type, aLocId.ToString().Data()));
1165                         result = kFALSE;
1166                 }
1167         }
1168         localEntries->Clear();
1169
1170         return result;
1171 }
1172
1173 //______________________________________________________________________________________________
1174 Bool_t AliShuttle::GetValueSet(const char* host, Int_t port, const char* entry,
1175                                 TObjArray* valueSet, DCSType type)
1176 {
1177 // Retrieve all "entry" data points from the DCS server
1178 // host, port: TSocket connection parameters
1179 // entry: name of the alias or data point
1180 // valueSet: array of retrieved AliDCSValue's
1181 // type: kAlias or kDP
1182
1183         AliDCSClient client(host, port, fTimeout, fRetries);
1184         if (!client.IsConnected())
1185         {
1186                 return kFALSE;
1187         }
1188
1189         Int_t result=0;
1190
1191         if (type == kAlias)
1192         {
1193                 result = client.GetAliasValues(entry,
1194                         GetCurrentStartTime(), GetCurrentEndTime(), valueSet);
1195         } else
1196         if (type == kDP)
1197         {
1198                 result = client.GetDPValues(entry,
1199                         GetCurrentStartTime(), GetCurrentEndTime(), valueSet);
1200         }
1201
1202         if (result < 0)
1203         {
1204                 Log(fCurrentDetector.Data(), Form("GetValueSet - Can't get '%s'! Reason: %s",
1205                         entry, AliDCSClient::GetErrorString(result)));
1206
1207                 if (result == AliDCSClient::fgkServerError)
1208                 {
1209                         Log(fCurrentDetector.Data(), Form("GetValueSet - Server error: %s",
1210                                 client.GetServerError().Data()));
1211                 }
1212
1213                 return kFALSE;
1214         }
1215
1216         return kTRUE;
1217 }
1218
1219 //______________________________________________________________________________________________
1220 const char* AliShuttle::GetFile(Int_t system, const char* detector,
1221                 const char* id, const char* source)
1222 {
1223 // Get calibration file from file exchange servers
1224 // First queris the FXS database for the file name, using the run, detector, id and source info
1225 // then calls RetrieveFile(filename) for actual copy to local disk
1226 // run: current run being processed (given by Logbook entry fLogbookEntry)
1227 // detector: the Preprocessor name
1228 // id: provided as a parameter by the Preprocessor
1229 // source: provided by the Preprocessor through GetFileSources function
1230
1231         // check connection, in case connect
1232         if (!Connect(system))
1233         {
1234                 Log(detector, Form("GetFile - Couldn't connect to %s FXS database", GetSystemName(system)));
1235                 return 0;
1236         }
1237
1238         // Query preparation
1239         TString sqlQueryStart;
1240         TString whereClause;
1241         TString sourceName(source);
1242         Int_t nFields = 0;
1243         if (system == kDAQ)
1244         {
1245                 sqlQueryStart = Form("select filePath,size from %s where", fConfig->GetFXSdbTable(system));
1246                 whereClause = Form("run=%d and detector=\"%s\" and fileId=\"%s\" and DAQsource=\"%s\"",
1247                                 GetCurrentRun(), detector, id, source);
1248                 nFields = 2;
1249
1250         }
1251         else if (system == kDCS)
1252         {
1253                 sqlQueryStart = Form("select filePath,size from %s where", fConfig->GetFXSdbTable(system));
1254                 whereClause = Form("run=%d and detector=\"%s\" and fileId=\"%s\"",
1255                                 GetCurrentRun(), detector, id);
1256                 nFields = 2;
1257                 sourceName="none";
1258         }
1259         else if (system == kHLT)
1260         {
1261                 sqlQueryStart = Form("select filePath,fileSize,fileChecksum from %s where",
1262                                                                                 fConfig->GetFXSdbTable(system));
1263                 whereClause = Form("run=%d and detector=\"%s\" and fileId=\"%s\" and DDLnumbers=\"%s\"",
1264                                 GetCurrentRun(), detector, id, source);
1265                 nFields = 3;
1266         }
1267
1268         TString sqlQuery = Form("%s %s", sqlQueryStart.Data(), whereClause.Data());
1269
1270         AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
1271
1272         // Query execution
1273         TSQLResult* aResult = 0;
1274         aResult = dynamic_cast<TSQLResult*> (fServer[system]->Query(sqlQuery));
1275         if (!aResult) {
1276                 Log(detector, Form("GetFileName - Can't execute SQL query to %s database for: id = %s, source = %s",
1277                                 GetSystemName(system), id, sourceName.Data()));
1278                 return 0;
1279         }
1280
1281         if(aResult->GetRowCount() == 0)
1282         {
1283                 Log(detector,
1284                         Form("GetFileName - No entry in %s FXS db for: id = %s, source = %s",
1285                                 GetSystemName(system), id, sourceName.Data()));
1286                 delete aResult;
1287                 return 0;
1288         }
1289
1290         if (aResult->GetRowCount() > 1) {
1291                 Log(detector,
1292                         Form("GetFileName - More than one entry in %s FXS db for: id = %s, source = %s",
1293                                 GetSystemName(system), id, sourceName.Data()));
1294                 delete aResult;
1295                 return 0;
1296         }
1297
1298         if (aResult->GetFieldCount() != nFields) {
1299                 Log(detector,
1300                         Form("GetFileName - Wrong field count in %s FXS db for: id = %s, source = %s",
1301                                 GetSystemName(system), id, sourceName.Data()));
1302                 delete aResult;
1303                 return 0;
1304         }
1305
1306         TSQLRow* aRow = dynamic_cast<TSQLRow*> (aResult->Next());
1307
1308         if (!aRow){
1309                 Log(detector, Form("GetFileName - Empty set result in %s FXS db from query: id = %s, source = %s",
1310                                 GetSystemName(system), id, sourceName.Data()));
1311                 delete aResult;
1312                 return 0;
1313         }
1314
1315         TString filePath(aRow->GetField(0), aRow->GetFieldLength(0));
1316         TString fileSize(aRow->GetField(1), aRow->GetFieldLength(1));
1317         TString fileMd5Sum;
1318         if(system == kHLT) fileMd5Sum = aRow->GetField(2);
1319
1320         delete aResult;
1321         delete aRow;
1322
1323         AliDebug(2, Form("filePath = %s",filePath.Data()));
1324
1325         // retrieved file is renamed to make it unique
1326         TString localFileName = Form("%s_%s_%d_%s_%s.shuttle",
1327                                         GetSystemName(system), detector, GetCurrentRun(), id, sourceName.Data());
1328
1329
1330         // file retrieval from FXS
1331         UInt_t nRetries = 0;
1332         UInt_t maxRetries = 3;
1333         Bool_t result = kFALSE;
1334
1335         // copy!! if successful TSystem::Exec returns 0
1336         while(nRetries++ < maxRetries) {
1337                 AliDebug(2, Form("Trying to copy file. Retry # %d", nRetries));
1338                 result = RetrieveFile(system, filePath.Data(), localFileName.Data());
1339                 if(!result)
1340                 {
1341                         Log(detector, Form("GetFileName - Copy of file %s from %s FXS failed",
1342                                         filePath.Data(), GetSystemName(system)));
1343                         continue;
1344                 } else {
1345                         AliInfo(Form("File %s copied from %s FXS into %s/%s",
1346                                                 filePath.Data(), GetSystemName(system),
1347                                                 GetShuttleTempDir(), localFileName.Data()));
1348                 }
1349
1350                 if (system == kHLT)
1351                 {
1352                         // compare md5sum of local file with the one stored in the FXS DB
1353                         Int_t md5Comp = gSystem->Exec(Form("md5sum %s/%s |grep %s 2>&1 > /dev/null",
1354                                                 GetShuttleTempDir(), localFileName.Data(), fileMd5Sum.Data()));
1355
1356                         if (md5Comp != 0)
1357                         {
1358                                 Log(detector, Form("GetFileName - md5sum of file %s does not match with local copy!",
1359                                                         filePath.Data()));
1360                                 result = kFALSE;
1361                                 continue;
1362                         }
1363                 }
1364                 if (result) break;
1365         }
1366
1367         if(!result) return 0;
1368
1369         fFXSCalled[system]=kTRUE;
1370         TObjString *fileParams = new TObjString(Form("%s#!?!#%s", id, sourceName.Data()));
1371         fFXSlist[system].Add(fileParams);
1372
1373         static TString fullLocalFileName;
1374         fullLocalFileName = TString::Format("%s/%s", GetShuttleTempDir(), localFileName.Data());
1375
1376         AliInfo(Form("fullLocalFileName = %s", fullLocalFileName.Data()));
1377
1378         return fullLocalFileName.Data();
1379
1380 }
1381
1382 //______________________________________________________________________________________________
1383 Bool_t AliShuttle::RetrieveFile(UInt_t system, const char* fxsFileName, const char* localFileName)
1384 {
1385 // Copies file from FXS to local Shuttle machine
1386
1387         // check temp directory: trying to cd to temp; if it does not exist, create it
1388         AliDebug(2, Form("Copy file %s from %s FXS into %s/%s",
1389                         GetSystemName(system), fxsFileName, GetShuttleTempDir(), localFileName));
1390
1391         void* dir = gSystem->OpenDirectory(GetShuttleTempDir());
1392         if (dir == NULL) {
1393                 if (gSystem->mkdir(GetShuttleTempDir(), kTRUE)) {
1394                         AliError(Form("Can't open directory <%s>", GetShuttleTempDir()));
1395                         return kFALSE;
1396                 }
1397
1398         } else {
1399                 gSystem->FreeDirectory(dir);
1400         }
1401
1402         TString baseFXSFolder;
1403         if (system == kDAQ)
1404         {
1405                 baseFXSFolder = "FES/";
1406         }
1407         else if (system == kDCS)
1408         {
1409                 baseFXSFolder = "";
1410         }
1411         else if (system == kHLT)
1412         {
1413                 baseFXSFolder = "~/";
1414         }
1415
1416
1417         TString command = Form("scp -oPort=%d -2 %s@%s:%s%s %s/%s",
1418                 fConfig->GetFXSPort(system),
1419                 fConfig->GetFXSUser(system),
1420                 fConfig->GetFXSHost(system),
1421                 baseFXSFolder.Data(),
1422                 fxsFileName,
1423                 GetShuttleTempDir(),
1424                 localFileName);
1425
1426         AliDebug(2, Form("%s",command.Data()));
1427
1428         Bool_t result = (gSystem->Exec(command.Data()) == 0);
1429
1430         return result;
1431 }
1432
1433 //______________________________________________________________________________________________
1434 TList* AliShuttle::GetFileSources(Int_t system, const char* detector, const char* id)
1435 {
1436 // Get sources producing the condition file Id from file exchange servers
1437
1438         if (system == kDCS)
1439         {
1440                 AliError("DCS system has only one source of data!");
1441                 return NULL;
1442
1443         }
1444
1445         // check connection, in case connect
1446         if (!Connect(system))
1447         {
1448                 Log(detector, Form("GetFile - Couldn't connect to %s FXS database", GetSystemName(system)));
1449                 return NULL;
1450         }
1451
1452         TString sourceName = 0;
1453         if (system == kDAQ)
1454         {
1455                 sourceName = "DAQsource";
1456         } else if (system == kHLT)
1457         {
1458                 sourceName = "DDLnumbers";
1459         }
1460
1461         TString sqlQueryStart = Form("select %s from %s where", sourceName.Data(), fConfig->GetFXSdbTable(kDAQ));
1462         TString whereClause = Form("run=%d and detector=\"%s\" and fileId=\"%s\"",
1463                                 GetCurrentRun(), detector, id);
1464         TString sqlQuery = Form("%s %s", sqlQueryStart.Data(), whereClause.Data());
1465
1466         AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
1467
1468         // Query execution
1469         TSQLResult* aResult;
1470         aResult = fServer[system]->Query(sqlQuery);
1471         if (!aResult) {
1472                 Log(detector, Form("GetFileSources - Can't execute SQL query to %s database for id: %s",
1473                                 GetSystemName(system), id));
1474                 return 0;
1475         }
1476
1477         if (aResult->GetRowCount() == 0)
1478         {
1479                 Log(detector,
1480                         Form("GetFileSources - No entry in %s FXS table for id: %s", GetSystemName(system), id));
1481                 delete aResult;
1482                 return 0;
1483         }
1484
1485         TSQLRow* aRow;
1486         TList *list = new TList();
1487         list->SetOwner(1);
1488
1489         while ((aRow = aResult->Next()))
1490         {
1491
1492                 TString source(aRow->GetField(0), aRow->GetFieldLength(0));
1493                 AliDebug(2, Form("%s = %s", sourceName.Data(), source.Data()));
1494                 list->Add(new TObjString(source));
1495                 delete aRow;
1496         }
1497
1498         delete aResult;
1499
1500         return list;
1501 }
1502
1503 //______________________________________________________________________________________________
1504 Bool_t AliShuttle::Connect(Int_t system)
1505 {
1506 // Connect to MySQL Server of the system's FXS MySQL databases
1507 // DAQ Logbook, Shuttle Logbook and DAQ FXS db are on the same host
1508
1509         // check connection: if already connected return
1510         if(fServer[system] && fServer[system]->IsConnected()) return kTRUE;
1511
1512         TString dbHost, dbUser, dbPass, dbName;
1513
1514         if (system < 3) // FXS db servers
1515         {
1516                 dbHost = Form("mysql://%s:%d", fConfig->GetFXSdbHost(system), fConfig->GetFXSdbPort(system));
1517                 dbUser = fConfig->GetFXSdbUser(system);
1518                 dbPass = fConfig->GetFXSdbPass(system);
1519                 dbName =   fConfig->GetFXSdbName(system);
1520         } else { // Run & Shuttle logbook servers
1521         // TODO Will the Shuttle logbook server be the same as the Run logbook server ???
1522                 dbHost = Form("mysql://%s:%d", fConfig->GetDAQlbHost(), fConfig->GetDAQlbPort());
1523                 dbUser = fConfig->GetDAQlbUser();
1524                 dbPass = fConfig->GetDAQlbPass();
1525                 dbName =   fConfig->GetDAQlbDB();
1526         }
1527
1528         fServer[system] = TSQLServer::Connect(dbHost.Data(), dbUser.Data(), dbPass.Data());
1529         if (!fServer[system] || !fServer[system]->IsConnected()) {
1530                 if(system < 3)
1531                 {
1532                 AliError(Form("Can't establish connection to FXS database for %s",
1533                                         AliShuttleInterface::GetSystemName(system)));
1534                 } else {
1535                 AliError("Can't establish connection to Run logbook.");
1536                 }
1537                 if(fServer[system]) delete fServer[system];
1538                 return kFALSE;
1539         }
1540
1541         // Get tables
1542         TSQLResult* aResult=0;
1543         switch(system){
1544                 case kDAQ:
1545                         aResult = fServer[kDAQ]->GetTables(dbName.Data());
1546                         break;
1547                 case kDCS:
1548                         aResult = fServer[kDCS]->GetTables(dbName.Data());
1549                         break;
1550                 case kHLT:
1551                         aResult = fServer[kHLT]->GetTables(dbName.Data());
1552                         break;
1553                 default:
1554                         aResult = fServer[3]->GetTables(dbName.Data());
1555                         break;
1556         }
1557
1558         delete aResult;
1559         return kTRUE;
1560 }
1561
1562 //______________________________________________________________________________________________
1563 Bool_t AliShuttle::UpdateTable()
1564 {
1565 // Update FXS table filling time_processed field in all rows corresponding to current run and detector
1566
1567         Bool_t result = kTRUE;
1568
1569         for (UInt_t system=0; system<3; system++)
1570         {
1571                 if(!fFXSCalled[system]) continue;
1572
1573                 // check connection, in case connect
1574                 if (!Connect(system))
1575                 {
1576                         Log(fCurrentDetector, Form("UpdateTable - Couldn't connect to %s FXS database", GetSystemName(system)));
1577                         result = kFALSE;
1578                         continue;
1579                 }
1580
1581                 TTimeStamp now; // now
1582
1583                 // Loop on FXS list entries
1584                 TIter iter(&fFXSlist[system]);
1585                 TObjString *aFXSentry=0;
1586                 while ((aFXSentry = dynamic_cast<TObjString*> (iter.Next())))
1587                 {
1588                         TString aFXSentrystr = aFXSentry->String();
1589                         TObjArray *aFXSarray = aFXSentrystr.Tokenize("#!?!#");
1590                         if (!aFXSarray || aFXSarray->GetEntries() != 2 )
1591                         {
1592                                 Log(fCurrentDetector, Form("UpdateTable - error updating %s FXS entry. Check string: <%s>",
1593                                         GetSystemName(system), aFXSentrystr.Data()));
1594                                 if(aFXSarray) delete aFXSarray;
1595                                 result = kFALSE;
1596                                 continue;
1597                         }
1598                         const char* fileId = ((TObjString*) aFXSarray->At(0))->GetName();
1599                         const char* source = ((TObjString*) aFXSarray->At(1))->GetName();
1600
1601                         TString whereClause;
1602                         if (system == kDAQ)
1603                         {
1604                                 whereClause = Form("where run=%d and detector=\"%s\" and fileId=\"%s\" and DAQsource=\"%s\";",
1605                                                         GetCurrentRun(), fCurrentDetector.Data(), fileId, source);
1606                         }
1607                         else if (system == kDCS)
1608                         {
1609                                 whereClause = Form("where run=%d and detector=\"%s\" and fileId=\"%s\";",
1610                                                         GetCurrentRun(), fCurrentDetector.Data(), fileId);
1611                         }
1612                         else if (system == kHLT)
1613                         {
1614                                 whereClause = Form("where run=%d and detector=\"%s\" and fileId=\"%s\" and DDLnumbers=\"%s\";",
1615                                                         GetCurrentRun(), fCurrentDetector.Data(), fileId, source);
1616                         }
1617
1618                         delete aFXSarray;
1619
1620                         TString sqlQuery = Form("update %s set time_processed=%d %s", fConfig->GetFXSdbTable(system),
1621                                                                 now.GetSec(), whereClause.Data());
1622
1623                         AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
1624
1625                         // Query execution
1626                         TSQLResult* aResult;
1627                         aResult = dynamic_cast<TSQLResult*> (fServer[system]->Query(sqlQuery));
1628                         if (!aResult)
1629                         {
1630                                 Log(fCurrentDetector, Form("UpdateTable - %s db: can't execute SQL query <%s>",
1631                                                                 GetSystemName(system), sqlQuery.Data()));
1632                                 result = kFALSE;
1633                                 continue;
1634                         }
1635                         delete aResult;
1636                 }
1637         }
1638
1639         return result;
1640 }
1641
1642 //______________________________________________________________________________________________
1643 Bool_t AliShuttle::UpdateShuttleLogbook(const char* detector, const char* status)
1644 {
1645         //
1646         // Update Shuttle logbook filling detector or shuttle_done column
1647         // ex. of usage: UpdateShuttleLogbook("PHOS", "DONE") or UpdateShuttleLogbook("shuttle_done")
1648         //
1649
1650         // check connection, in case connect
1651         if(!Connect(3)){
1652                 Log("SHUTTLE", "UpdateShuttleLogbook - Couldn't connect to DAQ Logbook.");
1653                 return kFALSE;
1654         }
1655
1656         TString detName(detector);
1657         TString setClause;
1658         if(detName == "shuttle_done")
1659         {
1660                 setClause = "set shuttle_done=1";
1661
1662                 // Send the information to ML
1663                 TMonaLisaText  mlStatus("SHUTTLE_status", "Done");
1664
1665                 TList mlList;
1666                 mlList.Add(&mlStatus);
1667
1668                 fMonaLisa->SendParameters(&mlList);
1669         } else {
1670                 TString statusStr(status);
1671                 if(statusStr.Contains("done", TString::kIgnoreCase) ||
1672                    statusStr.Contains("failed", TString::kIgnoreCase)){
1673                         setClause = Form("set %s=\"%s\"", detector, status);
1674                 } else {
1675                         Log("SHUTTLE",
1676                                 Form("UpdateShuttleLogbook - Invalid status <%s> for detector %s",
1677                                         status, detector));
1678                         return kFALSE;
1679                 }
1680         }
1681
1682         TString whereClause = Form("where run=%d", GetCurrentRun());
1683
1684         TString sqlQuery = Form("update logbook_shuttle %s %s",
1685                                         setClause.Data(), whereClause.Data());
1686
1687         AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
1688
1689         // Query execution
1690         TSQLResult* aResult;
1691         aResult = dynamic_cast<TSQLResult*> (fServer[3]->Query(sqlQuery));
1692         if (!aResult) {
1693                 Log("SHUTTLE", Form("UpdateShuttleLogbook - Can't execute query <%s>", sqlQuery.Data()));
1694                 return kFALSE;
1695         }
1696         delete aResult;
1697
1698         return kTRUE;
1699 }
1700
1701 //______________________________________________________________________________________________
1702 Int_t AliShuttle::GetCurrentRun() const
1703 {
1704 // Get current run from logbook entry
1705
1706         return fLogbookEntry ? fLogbookEntry->GetRun() : -1;
1707 }
1708
1709 //______________________________________________________________________________________________
1710 UInt_t AliShuttle::GetCurrentStartTime() const
1711 {
1712 // get current start time
1713
1714         return fLogbookEntry ? fLogbookEntry->GetStartTime() : 0;
1715 }
1716
1717 //______________________________________________________________________________________________
1718 UInt_t AliShuttle::GetCurrentEndTime() const
1719 {
1720 // get current end time from logbook entry
1721
1722         return fLogbookEntry ? fLogbookEntry->GetEndTime() : 0;
1723 }
1724
1725 //______________________________________________________________________________________________
1726 void AliShuttle::Log(const char* detector, const char* message)
1727 {
1728 // Fill log string with a message
1729
1730         void* dir = gSystem->OpenDirectory(GetShuttleLogDir());
1731         if (dir == NULL) {
1732                 if (gSystem->mkdir(GetShuttleLogDir(), kTRUE)) {
1733                         AliError(Form("Can't open directory <%s>", GetShuttleLogDir()));
1734                         return;
1735                 }
1736
1737         } else {
1738                 gSystem->FreeDirectory(dir);
1739         }
1740
1741         TString toLog = Form("%s (%d): %s - ", TTimeStamp(time(0)).AsString("s"), getpid(), detector);
1742         if (GetCurrentRun() >= 0) 
1743                 toLog += Form("run %d - ", GetCurrentRun());
1744         toLog += Form("%s", message);
1745
1746         AliInfo(toLog.Data());
1747
1748         TString fileName;
1749         if (GetCurrentRun() >= 0) 
1750                 fileName.Form("%s/%s_%d.log", GetShuttleLogDir(), detector, GetCurrentRun());
1751         else
1752                 fileName.Form("%s/%s.log", GetShuttleLogDir(), detector);
1753         
1754         gSystem->ExpandPathName(fileName);
1755
1756         ofstream logFile;
1757         logFile.open(fileName, ofstream::out | ofstream::app);
1758
1759         if (!logFile.is_open()) {
1760                 AliError(Form("Could not open file %s", fileName.Data()));
1761                 return;
1762         }
1763
1764         logFile << toLog.Data() << "\n";
1765
1766         logFile.close();
1767 }
1768
1769 //______________________________________________________________________________________________
1770 Bool_t AliShuttle::Collect(Int_t run)
1771 {
1772 //
1773 // Collects conditions data for all UNPROCESSED run written to DAQ LogBook in case of run = -1 (default)
1774 // If a dedicated run is given this run is processed
1775 //
1776 // In operational mode, this is the Shuttle function triggered by the EOR signal.
1777 //
1778
1779         if (run == -1)
1780                 Log("SHUTTLE","Collect - Shuttle called. Collecting conditions data for unprocessed runs");
1781         else
1782                 Log("SHUTTLE", Form("Collect - Shuttle called. Collecting conditions data for run %d", run));
1783
1784         SetLastAction("Starting");
1785
1786         TString whereClause("where shuttle_done=0");
1787         if (run != -1)
1788                 whereClause += Form(" and run=%d", run);
1789
1790         TObjArray shuttleLogbookEntries;
1791         if (!QueryShuttleLogbook(whereClause, shuttleLogbookEntries))
1792         {
1793                 Log("SHUTTLE", "Collect - Can't retrieve entries from Shuttle logbook");
1794                 return kFALSE;
1795         }
1796
1797         if (shuttleLogbookEntries.GetEntries() == 0)
1798         {
1799                 if (run == -1)
1800                         Log("SHUTTLE","Collect - Found no UNPROCESSED runs in Shuttle logbook");
1801                 else
1802                         Log("SHUTTLE", Form("Collect - Run %d is already DONE "
1803                                                 "or it does not exist in Shuttle logbook", run));
1804                 return kTRUE;
1805         }
1806
1807         for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
1808                 fFirstUnprocessed[iDet] = kTRUE;
1809
1810         if (run != -1)
1811         {
1812                 // query Shuttle logbook for earlier runs, check if some detectors are unprocessed,
1813                 // flag them into fFirstUnprocessed array
1814                 TString whereClause(Form("where shuttle_done=0 and run < %d", run));
1815                 TObjArray tmpLogbookEntries;
1816                 if (!QueryShuttleLogbook(whereClause, tmpLogbookEntries))
1817                 {
1818                         Log("SHUTTLE", "Collect - Can't retrieve entries from Shuttle logbook");
1819                         return kFALSE;
1820                 }
1821
1822                 TIter iter(&tmpLogbookEntries);
1823                 AliShuttleLogbookEntry* anEntry = 0;
1824                 while ((anEntry = dynamic_cast<AliShuttleLogbookEntry*> (iter.Next())))
1825                 {
1826                         for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
1827                         {
1828                                 if (anEntry->GetDetectorStatus(iDet) == AliShuttleLogbookEntry::kUnprocessed)
1829                                 {
1830                                         AliDebug(2, Form("Run %d: setting %s as \"not first time unprocessed\"",
1831                                                         anEntry->GetRun(), GetDetName(iDet)));
1832                                         fFirstUnprocessed[iDet] = kFALSE;
1833                                 }
1834                         }
1835
1836                 }
1837
1838         }
1839
1840         if (!RetrieveConditionsData(shuttleLogbookEntries))
1841         {
1842                 Log("SHUTTLE", "Collect - Process of at least one run failed");
1843                 return kFALSE;
1844         }
1845
1846         Log("SHUTTLE", "Collect - Requested run(s) successfully processed");
1847         return kTRUE;
1848 }
1849
1850 //______________________________________________________________________________________________
1851 Bool_t AliShuttle::RetrieveConditionsData(const TObjArray& dateEntries)
1852 {
1853 // Retrieve conditions data for all runs that aren't processed yet
1854
1855         Bool_t hasError = kFALSE;
1856
1857         TIter iter(&dateEntries);
1858         AliShuttleLogbookEntry* anEntry;
1859
1860         while ((anEntry = (AliShuttleLogbookEntry*) iter.Next())){
1861                 if (!Process(anEntry)){
1862                         hasError = kTRUE;
1863                 }
1864
1865                 // clean SHUTTLE temp directory
1866                 TString command = Form("rm -f %s/*.shuttle", GetShuttleTempDir());
1867                 gSystem->Exec(command.Data());
1868         }
1869
1870         return hasError == kFALSE;
1871 }
1872
1873 //______________________________________________________________________________________________
1874 ULong_t AliShuttle::GetTimeOfLastAction() const
1875 {
1876         ULong_t tmp;
1877
1878         fMonitoringMutex->Lock();
1879
1880         tmp = fLastActionTime;
1881
1882         fMonitoringMutex->UnLock();
1883
1884         return tmp;
1885 }
1886
1887 //______________________________________________________________________________________________
1888 const TString AliShuttle::GetLastAction() const
1889 {
1890         // returns a string description of the last action
1891
1892         TString tmp;
1893
1894         fMonitoringMutex->Lock();
1895         
1896         tmp = fLastAction;
1897         
1898         fMonitoringMutex->UnLock();
1899
1900         return tmp;
1901 }
1902
1903 //______________________________________________________________________________________________
1904 void AliShuttle::SetLastAction(const char* action)
1905 {
1906         // updates the monitoring variables
1907
1908         fMonitoringMutex->Lock();
1909
1910         fLastAction = action;
1911         fLastActionTime = time(0);
1912         
1913         fMonitoringMutex->UnLock();
1914 }
1915
1916 //______________________________________________________________________________________________
1917 const char* AliShuttle::GetRunParameter(const char* param)
1918 {
1919 // returns run parameter read from DAQ logbook
1920
1921         if(!fLogbookEntry) {
1922                 AliError("No logbook entry!");
1923                 return 0;
1924         }
1925
1926         return fLogbookEntry->GetRunParameter(param);
1927 }
1928
1929 //______________________________________________________________________________________________
1930 Bool_t AliShuttle::SendMail()
1931 {
1932 // sends a mail to the subdetector expert in case of preprocessor error
1933
1934         void* dir = gSystem->OpenDirectory(GetShuttleLogDir());
1935         if (dir == NULL)
1936         {
1937                 if (gSystem->mkdir(GetShuttleLogDir(), kTRUE))
1938                 {
1939                         AliError(Form("Can't open directory <%s>", GetShuttleLogDir()));
1940                         return kFALSE;
1941                 }
1942
1943         } else {
1944                 gSystem->FreeDirectory(dir);
1945         }
1946
1947         TString bodyFileName;
1948         bodyFileName.Form("%s/mail.body", GetShuttleLogDir());
1949         gSystem->ExpandPathName(bodyFileName);
1950
1951         ofstream mailBody;
1952         mailBody.open(bodyFileName, ofstream::out);
1953
1954         if (!mailBody.is_open())
1955         {
1956                 AliError(Form("Could not open mail body file %s", bodyFileName.Data()));
1957                 return kFALSE;
1958         }
1959
1960         TString to="";
1961         TIter iterExperts(fConfig->GetResponsibles(fCurrentDetector));
1962         TObjString *anExpert=0;
1963         while ((anExpert = (TObjString*) iterExperts.Next()))
1964         {
1965                 to += Form("%s,", anExpert->GetName());
1966         }
1967         to.Remove(to.Length()-1);
1968         AliDebug(2, Form("to: %s",to.Data()));
1969
1970         // TODO this will be removed...
1971         if (to.Contains("not_yet_set")) {
1972                 AliInfo("List of detector responsibles not yet set!");
1973                 return kFALSE;
1974         }
1975
1976         TString cc="alberto.colla@cern.ch";
1977
1978         TString subject = Form("%s Shuttle preprocessor error in run %d !",
1979                                 fCurrentDetector.Data(), GetCurrentRun());
1980         AliDebug(2, Form("subject: %s", subject.Data()));
1981
1982         TString body = Form("Dear %s expert(s), \n\n", fCurrentDetector.Data());
1983         body += Form("SHUTTLE just detected that your preprocessor "
1984                         "exited with ERROR state in run %d!!\n\n", GetCurrentRun());
1985         body += Form("Please check %s status on the web page asap!\n\n", fCurrentDetector.Data());
1986         body += Form("The last 10 lines of %s log file are following:\n\n");
1987
1988         AliDebug(2, Form("Body begin: %s", body.Data()));
1989
1990         mailBody << body.Data();
1991         mailBody.close();
1992         mailBody.open(bodyFileName, ofstream::out | ofstream::app);
1993
1994         TString logFileName = Form("%s/%s_%d.log", GetShuttleLogDir(), fCurrentDetector.Data(), GetCurrentRun());
1995         TString tailCommand = Form("tail -n 10 %s >> %s", logFileName.Data(), bodyFileName.Data());
1996         if (gSystem->Exec(tailCommand.Data()))
1997         {
1998                 mailBody << Form("%s log file not found ...\n\n", fCurrentDetector.Data());
1999         }
2000
2001         TString endBody = Form("------------------------------------------------------\n\n");
2002         endBody += Form("In case of problems please contact the SHUTTLE core team.\n\n");
2003         endBody += "Please do not answer this message directly, it is automatically generated.\n\n";
2004         endBody += "Sincerely yours,\n\n \t\t\tthe SHUTTLE\n";
2005
2006         AliDebug(2, Form("Body end: %s", endBody.Data()));
2007
2008         mailBody << endBody.Data();
2009
2010         mailBody.close();
2011
2012         // send mail!
2013         TString mailCommand = Form("mail -s \"%s\" -c %s %s < %s",
2014                                                 subject.Data(),
2015                                                 cc.Data(),
2016                                                 to.Data(),
2017                                                 bodyFileName.Data());
2018         AliDebug(2, Form("mail command: %s", mailCommand.Data()));
2019
2020         Bool_t result = gSystem->Exec(mailCommand.Data());
2021
2022         return result == 0;
2023 }
2024
2025 //______________________________________________________________________________________________
2026 void AliShuttle::SetShuttleTempDir(const char* tmpDir)
2027 {
2028 // sets Shuttle temp directory
2029
2030         fgkShuttleTempDir = gSystem->ExpandPathName(tmpDir);
2031 }
2032
2033 //______________________________________________________________________________________________
2034 void AliShuttle::SetShuttleLogDir(const char* logDir)
2035 {
2036 // sets Shuttle log directory
2037
2038         fgkShuttleLogDir = gSystem->ExpandPathName(logDir);
2039 }