fd19c08e6371b48f745559253f7a7a4e54f8582e
[u/mrichter/AliRoot.git] / SHUTTLE / AliShuttle.cxx
1 /**************************************************************************
2  * Copyright(c) 1998-1999, ALICE Experiment at CERN, All rights reserved. *
3  *                                                                        *
4  * Author: The ALICE Off-line Project.                                    *
5  * Contributors are mentioned in the code where appropriate.              *
6  *                                                                        *
7  * Permission to use, copy, modify and distribute this software and its   *
8  * documentation strictly for non-commercial purposes is hereby granted   *
9  * without fee, provided that the above copyright notice appears in all   *
10  * copies and that both the copyright notice and this permission notice   *
11  * appear in the supporting documentation. The authors make no claims     *
12  * about the suitability of this software for any purpose. It is          *
13  * provided "as is" without express or implied warranty.                  *
14  **************************************************************************/
15
16 /*
17 $Log$
18 Revision 1.27  2007/01/30 17:52:42  jgrosseo
19 adding monalisa monitoring
20
21 Revision 1.26  2007/01/23 19:20:03  acolla
22 Removed old ldif files, added TOF, MCH ldif files. Added some options in
23 AliShuttleConfig::Print. Added in Ali Shuttle: SetShuttleTempDir and
24 SetShuttleLogDir
25
26 Revision 1.25  2007/01/15 19:13:52  acolla
27 Moved some AliInfo to AliDebug in SendMail function
28
29 Revision 1.21  2006/12/07 08:51:26  jgrosseo
30 update (alberto):
31 table, db names in ldap configuration
32 added GRP preprocessor
33 DCS data can also be retrieved by data point
34
35 Revision 1.20  2006/11/16 16:16:48  jgrosseo
36 introducing strict run ordering flag
37 removed giving preprocessor name to preprocessor, they have to know their name themselves ;-)
38
39 Revision 1.19  2006/11/06 14:23:04  jgrosseo
40 major update (Alberto)
41 o) reading of run parameters from the logbook
42 o) online offline naming conversion
43 o) standalone DCSclient package
44
45 Revision 1.18  2006/10/20 15:22:59  jgrosseo
46 o) Adding time out to the execution of the preprocessors: The Shuttle forks and the parent process monitors the child
47 o) Merging Collect, CollectAll, CollectNew function
48 o) Removing implementation of empty copy constructors (declaration still there!)
49
50 Revision 1.17  2006/10/05 16:20:55  jgrosseo
51 adapting to new CDB classes
52
53 Revision 1.16  2006/10/05 15:46:26  jgrosseo
54 applying to the new interface
55
56 Revision 1.15  2006/10/02 16:38:39  jgrosseo
57 update (alberto):
58 fixed memory leaks
59 storing of objects that failed to be stored to the grid before
60 interfacing of shuttle status table in daq system
61
62 Revision 1.14  2006/08/29 09:16:05  jgrosseo
63 small update
64
65 Revision 1.13  2006/08/15 10:50:00  jgrosseo
66 effc++ corrections (alberto)
67
68 Revision 1.12  2006/08/08 14:19:29  jgrosseo
69 Update to shuttle classes (Alberto)
70
71 - Possibility to set the full object's path in the Preprocessor's and
72 Shuttle's  Store functions
73 - Possibility to extend the object's run validity in the same classes
74 ("startValidity" and "validityInfinite" parameters)
75 - Implementation of the StoreReferenceData function to store reference
76 data in a dedicated CDB storage.
77
78 Revision 1.11  2006/07/21 07:37:20  jgrosseo
79 last run is stored after each run
80
81 Revision 1.10  2006/07/20 09:54:40  jgrosseo
82 introducing status management: The processing per subdetector is divided into several steps,
83 after each step the status is stored on disk. If the system crashes in any of the steps the Shuttle
84 can keep track of the number of failures and skips further processing after a certain threshold is
85 exceeded. These thresholds can be configured in LDAP.
86
87 Revision 1.9  2006/07/19 10:09:55  jgrosseo
88 new configuration, accesst to DAQ FES (Alberto)
89
90 Revision 1.8  2006/07/11 12:44:36  jgrosseo
91 adding parameters for extended validity range of data produced by preprocessor
92
93 Revision 1.7  2006/07/10 14:37:09  jgrosseo
94 small fix + todo comment
95
96 Revision 1.6  2006/07/10 13:01:41  jgrosseo
97 enhanced storing of last sucessfully processed run (alberto)
98
99 Revision 1.5  2006/07/04 14:59:57  jgrosseo
100 revision of AliDCSValue: Removed wrapper classes, reduced storage size per value by factor 2
101
102 Revision 1.4  2006/06/12 09:11:16  jgrosseo
103 coding conventions (Alberto)
104
105 Revision 1.3  2006/06/06 14:26:40  jgrosseo
106 o) removed files that were moved to STEER
107 o) shuttle updated to follow the new interface (Alberto)
108
109 Revision 1.2  2006/03/07 07:52:34  hristov
110 New version (B.Yordanov)
111
112 Revision 1.6  2005/11/19 17:19:14  byordano
113 RetrieveDATEEntries and RetrieveConditionsData added
114
115 Revision 1.5  2005/11/19 11:09:27  byordano
116 AliShuttle declaration added
117
118 Revision 1.4  2005/11/17 17:47:34  byordano
119 TList changed to TObjArray
120
121 Revision 1.3  2005/11/17 14:43:23  byordano
122 import to local CVS
123
124 Revision 1.1.1.1  2005/10/28 07:33:58  hristov
125 Initial import as subdirectory in AliRoot
126
127 Revision 1.2  2005/09/13 08:41:15  byordano
128 default startTime endTime added
129
130 Revision 1.4  2005/08/30 09:13:02  byordano
131 some docs added
132
133 Revision 1.3  2005/08/29 21:15:47  byordano
134 some docs added
135
136 */
137
138 //
139 // This class is the main manager for AliShuttle. 
140 // It organizes the data retrieval from DCS and call the 
141 // interface methods of AliPreprocessor.
142 // For every detector in AliShuttleConfgi (see AliShuttleConfig),
143 // data for its set of aliases is retrieved. If there is registered
144 // AliPreprocessor for this detector then it will be used
145 // accroding to the schema (see AliPreprocessor).
146 // If there isn't registered AliPreprocessor than the retrieved
147 // data is stored automatically to the undelying AliCDBStorage.
148 // For detSpec is used the alias name.
149 //
150
151 #include "AliShuttle.h"
152
153 #include "AliCDBManager.h"
154 #include "AliCDBStorage.h"
155 #include "AliCDBId.h"
156 #include "AliCDBRunRange.h"
157 #include "AliCDBPath.h"
158 #include "AliCDBEntry.h"
159 #include "AliShuttleConfig.h"
160 #include "DCSClient/AliDCSClient.h"
161 #include "AliLog.h"
162 #include "AliPreprocessor.h"
163 #include "AliShuttleStatus.h"
164 #include "AliShuttleLogbookEntry.h"
165
166 #include <TSystem.h>
167 #include <TObject.h>
168 #include <TString.h>
169 #include <TTimeStamp.h>
170 #include <TObjString.h>
171 #include <TSQLServer.h>
172 #include <TSQLResult.h>
173 #include <TSQLRow.h>
174 #include <TMutex.h>
175
176 #include <TMonaLisaWriter.h>
177
178 #include <fstream>
179
180 #include <sys/types.h>
181 #include <sys/wait.h>
182
183 ClassImp(AliShuttle)
184
185 TString AliShuttle::fgkMainCDB("alien://folder=ShuttleCDB");
186 TString AliShuttle::fgkLocalCDB("local://LocalShuttleCDB");
187 TString AliShuttle::fgkMainRefStorage("alien://folder=ShuttleReference");
188 TString AliShuttle::fgkLocalRefStorage("local://LocalReferenceStorage");
189
190 Bool_t AliShuttle::fgkProcessDCS(kTRUE); 
191
192 TString AliShuttle::fgkShuttleTempDir = gSystem->ExpandPathName("$ALICE_ROOT/SHUTTLE/temp");
193 TString AliShuttle::fgkShuttleLogDir = gSystem->ExpandPathName("$ALICE_ROOT/SHUTTLE/log");
194
195 //______________________________________________________________________________________________
196 AliShuttle::AliShuttle(const AliShuttleConfig* config,
197                 UInt_t timeout, Int_t retries):
198 fConfig(config),
199 fTimeout(timeout), fRetries(retries),
200 fPreprocessorMap(),
201 fLogbookEntry(0),
202 fCurrentDetector(),
203 fStatusEntry(0),
204 fGridError(kFALSE),
205 fMonitoringMutex(0),
206 fLastActionTime(0),
207 fLastAction(),
208 fMonaLisa(0)
209 {
210         //
211         // config: AliShuttleConfig used
212         // timeout: timeout used for AliDCSClient connection
213         // retries: the number of retries in case of connection error.
214         //
215
216         if (!fConfig->IsValid()) AliFatal("********** !!!!! Invalid configuration !!!!! **********");
217         for(int iSys=0;iSys<4;iSys++) {
218                 fServer[iSys]=0;
219                 if (iSys < 3)
220                         fFXSlist[iSys].SetOwner(kTRUE);
221         }
222         fPreprocessorMap.SetOwner(kTRUE);
223
224         for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
225                 fFirstUnprocessed[iDet] = kFALSE;
226
227         fMonitoringMutex = new TMutex();
228 }
229
230 //______________________________________________________________________________________________
231 AliShuttle::~AliShuttle()
232 {
233 // destructor
234
235         fPreprocessorMap.DeleteAll();
236         for(int iSys=0;iSys<4;iSys++)
237                 if(fServer[iSys]) {
238                         fServer[iSys]->Close();
239                         delete fServer[iSys];
240                         fServer[iSys] = 0;
241                 }
242
243         if (fStatusEntry){
244                 delete fStatusEntry;
245                 fStatusEntry = 0;
246         }
247         
248         if (fMonitoringMutex) 
249         {
250                 delete fMonitoringMutex;
251                 fMonitoringMutex = 0;
252         }
253 }
254
255 //______________________________________________________________________________________________
256 void AliShuttle::RegisterPreprocessor(AliPreprocessor* preprocessor)
257 {
258         //
259         // Registers new AliPreprocessor.
260         // It uses GetName() for indentificator of the pre processor.
261         // The pre processor is registered it there isn't any other
262         // with the same identificator (GetName()).
263         //
264
265         const char* detName = preprocessor->GetName();
266         if(GetDetPos(detName) < 0)
267                 AliFatal(Form("********** !!!!! Invalid detector name: %s !!!!! **********", detName));
268
269         if (fPreprocessorMap.GetValue(detName)) {
270                 AliWarning(Form("AliPreprocessor %s is already registered!", detName));
271                 return;
272         }
273
274         fPreprocessorMap.Add(new TObjString(detName), preprocessor);
275 }
276 //______________________________________________________________________________________________
277 UInt_t AliShuttle::Store(const AliCDBPath& path, TObject* object,
278                 AliCDBMetaData* metaData, Int_t validityStart, Bool_t validityInfinite)
279 {
280   // Stores a CDB object in the storage for offline reconstruction. Objects that are not needed for
281   // offline reconstruction, but should be stored anyway (e.g. for debugging) should NOT be stored
282   // using this function. Use StoreReferenceData instead!
283   // It calls WriteToCDB function which perform actual storage
284
285         return WriteToCDB(fgkMainCDB, fgkLocalCDB, path, object,
286                                 metaData, validityStart, validityInfinite);
287
288 }
289
290 //______________________________________________________________________________________________
291 UInt_t AliShuttle::StoreReferenceData(const AliCDBPath& path, TObject* object, AliCDBMetaData* metaData)
292 {
293   // Stores a CDB object in the storage for reference data. This objects will not be available during
294   // offline reconstrunction. Use this function for reference data only!
295   // It calls WriteToCDB function which perform actual storage
296
297         return WriteToCDB(fgkMainRefStorage, fgkLocalRefStorage, path, object, metaData);
298
299 }
300
301 //______________________________________________________________________________________________
302 UInt_t AliShuttle::WriteToCDB(const char* mainUri, const char* localUri,
303                         const AliCDBPath& path, TObject* object, AliCDBMetaData* metaData,
304                         Int_t validityStart, Bool_t validityInfinite)
305 {
306   // write object into the CDB. Parameters are passed by Store and StoreReferenceData functions.
307   // The parameters are:
308   //   1) Uri of the main storage (Grid)
309   //   2) Uri of the backup storage (Local)
310   //   3) the object's path.
311   //   4) the object to be stored
312   //   5) the metaData to be associated with the object
313   //   6) the validity start run number w.r.t. the current run,
314   //      if the data is valid only for this run leave the default 0
315   //   7) specifies if the calibration data is valid for infinity (this means until updated),
316   //      typical for calibration runs, the default is kFALSE
317   //
318   // returns 0 if fail
319   //         1 if stored in main (Grid) storage
320   //         2 if stored in backup (Local) storage
321
322         const char* cdbType = (mainUri == fgkMainCDB) ? "CDB" : "Reference";
323
324         Int_t firstRun = GetCurrentRun() - validityStart;
325         if(firstRun < 0) {
326                 AliError("First valid run happens to be less than 0! Setting it to 0.");
327                 firstRun=0;
328         }
329
330         Int_t lastRun = -1;
331         if(validityInfinite) {
332                 lastRun = AliCDBRunRange::Infinity();
333         } else {
334                 lastRun = GetCurrentRun();
335         }
336
337         AliCDBId id(path, firstRun, lastRun, -1, -1);
338
339         if(! dynamic_cast<TObjString*> (metaData->GetProperty("RunUsed(TObjString)"))){
340                 TObjString runUsed = Form("%d", GetCurrentRun());
341                 metaData->SetProperty("RunUsed(TObjString)", runUsed.Clone());
342         }
343
344         UInt_t result = 0;
345
346         if (!(AliCDBManager::Instance()->GetStorage(mainUri))) {
347                 AliError(Form("WriteToCDB - Cannot activate main %s storage", cdbType));
348         } else {
349                 result = (UInt_t) AliCDBManager::Instance()->GetStorage(mainUri)
350                                         ->Put(object, id, metaData);
351         }
352
353         if(!result) {
354
355                 Log(fCurrentDetector,
356                         Form("WriteToCDB - Problem with main %s storage. Putting <%s> into backup storage",
357                                 cdbType, path.GetPath().Data()));
358
359                 // Set Grid version to current run number, to ease retrieval later
360                 id.SetVersion(GetCurrentRun());
361
362                 result = AliCDBManager::Instance()->GetStorage(localUri)
363                                         ->Put(object, id, metaData);
364
365                 if(result) {
366                         result = 2;
367                         fGridError = kTRUE;
368                 }else{
369                         Log(fCurrentDetector, "WriteToCDB - Can't store data!");
370                 }
371         }
372
373         return result;
374
375 }
376
377 //______________________________________________________________________________________________
378 AliShuttleStatus* AliShuttle::ReadShuttleStatus()
379 {
380 // Reads the AliShuttleStatus from the CDB
381
382         if (fStatusEntry){
383                 delete fStatusEntry;
384                 fStatusEntry = 0;
385         }
386
387         fStatusEntry = AliCDBManager::Instance()->GetStorage(AliShuttle::GetLocalCDB())
388                 ->Get(Form("/SHUTTLE/STATUS/%s", fCurrentDetector.Data()), GetCurrentRun());
389
390         if (!fStatusEntry) return 0;
391         fStatusEntry->SetOwner(1);
392
393         AliShuttleStatus* status = dynamic_cast<AliShuttleStatus*> (fStatusEntry->GetObject());
394         if (!status) {
395                 AliError("Invalid object stored to CDB!");
396                 return 0;
397         }
398
399         return status;
400 }
401
402 //______________________________________________________________________________________________
403 Bool_t AliShuttle::WriteShuttleStatus(AliShuttleStatus* status)
404 {
405 // writes the status for one subdetector
406
407         if (fStatusEntry){
408                 delete fStatusEntry;
409                 fStatusEntry = 0;
410         }
411
412         Int_t run = GetCurrentRun();
413
414         AliCDBId id(AliCDBPath("SHUTTLE", "STATUS", fCurrentDetector), run, run);
415
416         fStatusEntry = new AliCDBEntry(status, id, new AliCDBMetaData);
417         fStatusEntry->SetOwner(1);
418
419         UInt_t result = AliCDBManager::Instance()->GetStorage(fgkLocalCDB)->Put(fStatusEntry);
420
421         if (!result) {
422                 AliError(Form("WriteShuttleStatus for %s, run %d failed", fCurrentDetector.Data(), run));
423                 return kFALSE;
424         }
425         
426         SendMLInfo();
427
428         return kTRUE;
429 }
430
431 //______________________________________________________________________________________________
432 void AliShuttle::UpdateShuttleStatus(AliShuttleStatus::Status newStatus, Bool_t increaseCount)
433 {
434   // changes the AliShuttleStatus for the given detector and run to the given status
435
436         if (!fStatusEntry){
437                 AliError("UNEXPECTED: fStatusEntry empty");
438                 return;
439         }
440
441         AliShuttleStatus* status = dynamic_cast<AliShuttleStatus*> (fStatusEntry->GetObject());
442
443         if (!status){
444                 AliError("UNEXPECTED: status could not be read from current CDB entry");
445                 return;
446         }
447
448         TString actionStr = Form("UpdateShuttleStatus - %s: Changing state from %s to %s",
449                                 fCurrentDetector.Data(),
450                                 status->GetStatusName(),
451                                 status->GetStatusName(newStatus));
452         Log("SHUTTLE", actionStr);
453         SetLastAction(actionStr);
454
455         status->SetStatus(newStatus);
456         if (increaseCount) status->IncreaseCount();
457
458         AliCDBManager::Instance()->GetStorage(fgkLocalCDB)->Put(fStatusEntry);
459
460         SendMLInfo();
461 }
462
463 //______________________________________________________________________________________________
464 void AliShuttle::SendMLInfo()
465 {
466         //
467         // sends ML information about the current status of the current detector being processed
468         //
469         
470         AliShuttleStatus* status = dynamic_cast<AliShuttleStatus*> (fStatusEntry->GetObject());
471         
472         if (!status){
473                 AliError("UNEXPECTED: status could not be read from current CDB entry");
474                 return;
475         }
476         
477         TMonaLisaText  mlStatus(Form("%s_status", fCurrentDetector.Data()), status->GetStatusName());
478         TMonaLisaValue mlRetryCount(Form("%s_count", fCurrentDetector.Data()), status->GetCount());
479
480         TList mlList;
481         mlList.Add(&mlStatus);
482         mlList.Add(&mlRetryCount);
483
484         fMonaLisa->SendParameters(&mlList);
485 }
486
487 //______________________________________________________________________________________________
488 Bool_t AliShuttle::ContinueProcessing()
489 {
490 // this function reads the AliShuttleStatus information from CDB and
491 // checks if the processing should be continued
492 // if yes it returns kTRUE and updates the AliShuttleStatus with nextStatus
493
494         if (!fConfig->HostProcessDetector(fCurrentDetector)) return kFALSE;
495
496         AliPreprocessor* aPreprocessor =
497                 dynamic_cast<AliPreprocessor*> (fPreprocessorMap.GetValue(fCurrentDetector));
498         if (!aPreprocessor)
499         {
500                 AliInfo(Form("%s: no preprocessor registered", fCurrentDetector.Data()));
501                 return kFALSE;
502         }
503
504         AliShuttleLogbookEntry::Status entryStatus =
505                 fLogbookEntry->GetDetectorStatus(fCurrentDetector);
506
507         if(entryStatus != AliShuttleLogbookEntry::kUnprocessed) {
508                 AliInfo(Form("ContinueProcessing - %s is %s",
509                                 fCurrentDetector.Data(),
510                                 fLogbookEntry->GetDetectorStatusName(entryStatus)));
511                 return kFALSE;
512         }
513
514         // if we get here, according to Shuttle logbook subdetector is in UNPROCESSED state
515
516         // check if current run is first unprocessed run for current detector
517         if (fConfig->StrictRunOrder(fCurrentDetector) &&
518                 !fFirstUnprocessed[GetDetPos(fCurrentDetector)])
519         {
520                 Log("SHUTTLE", Form("ContinueProcessing - %s requires strict run ordering but this is not the first unprocessed run!"));
521                 return kFALSE;
522         }
523
524         AliShuttleStatus* status = ReadShuttleStatus();
525         if (!status) {
526                 // first time
527                 Log("SHUTTLE", Form("ContinueProcessing - %s: Processing first time",
528                                 fCurrentDetector.Data()));
529                 status = new AliShuttleStatus(AliShuttleStatus::kStarted);
530                 return WriteShuttleStatus(status);
531         }
532
533         // The following two cases shouldn't happen if Shuttle Logbook was correctly updated.
534         // If it happens it may mean Logbook updating failed... let's do it now!
535         if (status->GetStatus() == AliShuttleStatus::kDone ||
536             status->GetStatus() == AliShuttleStatus::kFailed){
537                 Log("SHUTTLE", Form("ContinueProcessing - %s is already %s. Updating Shuttle Logbook",
538                                         fCurrentDetector.Data(),
539                                         status->GetStatusName(status->GetStatus())));
540                 UpdateShuttleLogbook(fCurrentDetector.Data(),
541                                         status->GetStatusName(status->GetStatus()));
542                 return kFALSE;
543         }
544
545         if (status->GetStatus() == AliShuttleStatus::kStoreFailed) {
546                 Log("SHUTTLE",
547                         Form("ContinueProcessing - %s: Grid storage of one or more objects failed. Trying again now",
548                                 fCurrentDetector.Data()));
549                 if(TryToStoreAgain()){
550                         Log(fCurrentDetector.Data(), "ContinueProcessing - All objects successfully stored into OCDB");
551                         UpdateShuttleStatus(AliShuttleStatus::kDone);
552                         UpdateShuttleLogbook(fCurrentDetector.Data(), "DONE");
553                 } else {
554                         Log("SHUTTLE",
555                                 Form("ContinueProcessing - %s: Grid storage failed again",
556                                         fCurrentDetector.Data()));
557                         // trigger ML information manually because we do not had a status change
558                         SendMLInfo();
559                 }
560                 return kFALSE;
561         }
562
563         // if we get here, there is a restart
564         Bool_t cont = kFALSE;
565
566         // abort conditions
567         if (status->GetCount() >= fConfig->GetMaxRetries()) {
568                 Log("SHUTTLE", Form("ContinueProcessing - %s failed %d times in status %s - "
569                                 "Updating Shuttle Logbook", fCurrentDetector.Data(),
570                                 status->GetCount(), status->GetStatusName()));
571                 UpdateShuttleLogbook(fCurrentDetector.Data(), "FAILED");
572                 UpdateShuttleStatus(AliShuttleStatus::kFailed);
573         } else {
574                 Log("SHUTTLE", Form("ContinueProcessing - %s: restarting. "
575                                 "Aborted before with %s. Retry number %d.", fCurrentDetector.Data(),
576                                 status->GetStatusName(), status->GetCount()));
577                 UpdateShuttleStatus(AliShuttleStatus::kStarted, kTRUE);
578                 cont = kTRUE;
579         }
580
581         // Send mail to detector expert!
582         AliInfo(Form("Sending mail to %s expert...", fCurrentDetector.Data()));
583         if (!SendMail())
584                 Log("SHUTTLE", Form("ContinueProcessing - Could not send mail to %s expert",
585                                 fCurrentDetector.Data()));
586
587         return cont;
588 }
589
590 //______________________________________________________________________________________________
591 Bool_t AliShuttle::Process(AliShuttleLogbookEntry* entry)
592 {
593         //
594         // Makes data retrieval for all detectors in the configuration.
595         // entry: Shuttle logbook entry, contains run paramenters and status of detectors
596         // (Unprocessed, Inactive, Failed or Done).
597         // Returns kFALSE in case of error occured and kTRUE otherwise
598         //
599
600         if(!entry) return kFALSE;
601
602         fLogbookEntry = entry;
603
604         if (fLogbookEntry->IsDone())
605         {
606                 Log("SHUTTLE","Process - Shuttle is already DONE. Updating logbook");
607                 UpdateShuttleLogbook("shuttle_done");
608                 fLogbookEntry = 0;
609                 return kTRUE;
610         }
611
612         // create ML instance that monitors this run
613         fMonaLisa = new TMonaLisaWriter(Form("%d", GetCurrentRun()), "SHUTTLE", "aliendb1.cern.ch");
614         // disable monitoring of other parameters that come e.g. from TFile
615         gMonitoringWriter = 0;
616
617         AliInfo(Form("\n\n \t\t\t^*^*^*^*^*^*^*^*^*^*^*^* run %d: START ^*^*^*^*^*^*^*^*^*^*^*^* \n",
618                                         GetCurrentRun()));
619
620
621         // Send the information to ML
622         TMonaLisaText  mlStatus("SHUTTLE_status", "Processing");
623
624         TList mlList;
625         mlList.Add(&mlStatus);
626
627         fMonaLisa->SendParameters(&mlList);
628                         
629         fLogbookEntry->Print("all");
630
631         // Initialization
632         Bool_t hasError = kFALSE;
633
634         AliCDBStorage *mainCDBSto = AliCDBManager::Instance()->GetStorage(fgkMainCDB);
635         if(mainCDBSto) mainCDBSto->QueryCDB(GetCurrentRun());
636         AliCDBStorage *mainRefSto = AliCDBManager::Instance()->GetStorage(fgkMainRefStorage);
637         if(mainRefSto) mainRefSto->QueryCDB(GetCurrentRun());
638
639         // Loop on detectors in the configuration
640         TIter iter(fConfig->GetDetectors());
641         TObjString* aDetector = 0;
642
643         while ((aDetector = (TObjString*) iter.Next()))
644         {
645                 fCurrentDetector = aDetector->String();
646
647                 if (ContinueProcessing() == kFALSE) continue;
648
649                 AliInfo(Form("\n\n \t\t\t****** run %d - %s: START  ******",
650                                                 GetCurrentRun(), aDetector->GetName()));
651
652                 for(Int_t iSys=0;iSys<3;iSys++) fFXSCalled[iSys]=kFALSE;
653
654                 Log(fCurrentDetector.Data(), "Starting processing");
655
656                 Int_t pid = fork();
657
658                 if (pid < 0)
659                 {
660                         Log("SHUTTLE", "ERROR: Forking failed");
661                 }
662                 else if (pid > 0)
663                 {
664                         // parent
665                         AliInfo(Form("In parent process of %d - %s: Starting monitoring",
666                                                         GetCurrentRun(), aDetector->GetName()));
667
668                         Long_t begin = time(0);
669
670                         int status; // to be used with waitpid, on purpose an int (not Int_t)!
671                         while (waitpid(pid, &status, WNOHANG) == 0)
672                         {
673                                 Long_t expiredTime = time(0) - begin;
674
675                                 if (expiredTime > fConfig->GetPPTimeOut())
676                                 {
677                                         Log("SHUTTLE", Form("Process time out. Run time: %d seconds. Killing...",
678                                                                 expiredTime));
679
680                                         kill(pid, 9);
681
682                                         hasError = kTRUE;
683
684                                         gSystem->Sleep(1000);
685                                 }
686                                 else
687                                 {
688                                         if (expiredTime % 60 == 0)
689                                         Log("SHUTTLE", Form("Checked process. Run time: %d seconds.",
690                                                                 expiredTime));
691                                         gSystem->Sleep(1000);
692                                 }
693                         }
694
695                         AliInfo(Form("In parent process of %d - %s: Client has terminated.",
696                                                                 GetCurrentRun(), aDetector->GetName()));
697
698                         if (WIFEXITED(status))
699                         {
700                                 Int_t returnCode = WEXITSTATUS(status);
701
702                                 Log("SHUTTLE", Form("The return code is %d", returnCode));
703
704                                 if (returnCode != 0)
705                                 hasError = kTRUE;
706                         }
707                 }
708                 else if (pid == 0)
709                 {
710                         // client
711                         AliInfo(Form("In client process of %d - %s", GetCurrentRun(), aDetector->GetName()));
712
713                         UInt_t result = ProcessCurrentDetector();
714
715                         Int_t returnCode = 0; // will be set to 1 in case of an error
716
717                         if (!result)
718                         {
719                                 returnCode = 1;
720                                 AliInfo(Form("\n \t\t\t****** run %d - %s: PREPROCESSOR ERROR ****** \n\n",
721                                                         GetCurrentRun(), aDetector->GetName()));
722                         }
723                         else if (result == 2)
724                         {
725                                 AliInfo(Form("\n \t\t\t****** run %d - %s: STORAGE ERROR ****** \n\n",
726                                                         GetCurrentRun(), aDetector->GetName()));
727                         } else
728                         {
729                                 AliInfo(Form("\n \t\t\t****** run %d - %s: DONE ****** \n\n",
730                                                         GetCurrentRun(), aDetector->GetName()));
731                         }
732
733                         if (result > 0)
734                         {
735                                 // Process successful: Update time_processed field in FXS logbooks!
736                                 if (UpdateTable() == kFALSE) returnCode = 1;
737                         }
738
739                         AliInfo(Form("Client process of %d - %s is exiting now with %d.",
740                                                         GetCurrentRun(), aDetector->GetName(), returnCode));
741
742                         // the client exits here
743                         gSystem->Exit(returnCode);
744
745                         AliError("We should never get here!!!");
746                 }
747
748                 for (UInt_t iSys=0; iSys<3; iSys++)
749                 {
750                         if (fFXSCalled[iSys]) fFXSlist[iSys].Clear();
751                 }
752         }
753
754         AliInfo(Form("\n\n \t\t\t^*^*^*^*^*^*^*^*^*^*^*^* run %d: FINISH ^*^*^*^*^*^*^*^*^*^*^*^* \n",
755                                                         GetCurrentRun()));
756
757         //check if shuttle is done for this run, if so update logbook
758         TObjArray checkEntryArray;
759         checkEntryArray.SetOwner(1);
760         TString whereClause = Form("where run=%d", GetCurrentRun());
761         if (!QueryShuttleLogbook(whereClause.Data(), checkEntryArray) || checkEntryArray.GetEntries() == 0) {
762                 Log("SHUTTLE", Form("Process - Warning: Cannot check status of run %d on Shuttle logbook!",
763                                                 GetCurrentRun()));
764                 return hasError == kFALSE;
765         }
766
767         AliShuttleLogbookEntry* checkEntry = dynamic_cast<AliShuttleLogbookEntry*>
768                                                 (checkEntryArray.At(0));
769
770         if (checkEntry)
771         {
772                 if (checkEntry->IsDone())
773                 {
774                         Log("SHUTTLE","Process - Shuttle is DONE. Updating logbook");
775                         UpdateShuttleLogbook("shuttle_done");
776                 }
777                 else
778                 {
779                         for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
780                         {
781                                 if (checkEntry->GetDetectorStatus(iDet) == AliShuttleLogbookEntry::kUnprocessed)
782                                 {
783                                         AliDebug(2, Form("Run %d: setting %s as \"not first time unprocessed\"",
784                                                         checkEntry->GetRun(), GetDetName(iDet)));
785                                         fFirstUnprocessed[iDet] = kFALSE;
786                                 }
787                         }
788                 }
789         }
790
791         // remove ML instance
792         delete fMonaLisa;
793         fMonaLisa = 0;
794
795         fLogbookEntry = 0;
796
797         return hasError == kFALSE;
798 }
799
800 //______________________________________________________________________________________________
801 UInt_t AliShuttle::ProcessCurrentDetector()
802 {
803         //
804         // Makes data retrieval just for a specific detector (fCurrentDetector).
805         // Threre should be a configuration for this detector.
806
807         AliInfo(Form("Retrieving values for %s, run %d", fCurrentDetector.Data(), GetCurrentRun()));
808
809         UpdateShuttleStatus(AliShuttleStatus::kDCSStarted);
810
811         TMap dcsMap;
812         dcsMap.SetOwner(1);
813
814         Bool_t aDCSError = kFALSE;
815         fGridError = kFALSE;
816
817         // TODO Test only... I've added a flag that allows to
818         // exclude DCS archive DB query
819         if (!fgkProcessDCS)
820         {
821                 AliInfo("Skipping DCS processing!");
822                 aDCSError = kFALSE;
823         } else {
824                 TString host(fConfig->GetDCSHost(fCurrentDetector));
825                 Int_t port = fConfig->GetDCSPort(fCurrentDetector);
826
827                 // Retrieval of Aliases
828                 TObjString* anAlias = 0;
829                 Int_t iAlias = 1;
830                 Int_t nTotAliases= ((TMap*)fConfig->GetDCSAliases(fCurrentDetector))->GetEntries();
831                 TIter iterAliases(fConfig->GetDCSAliases(fCurrentDetector));
832                 while ((anAlias = (TObjString*) iterAliases.Next()))
833                 {
834                         TObjArray *valueSet = new TObjArray();
835                         valueSet->SetOwner(1);
836
837                         if (((iAlias-1) % 500) == 0 || iAlias == nTotAliases)
838                                 AliInfo(Form("Querying DCS archive: alias %s (%d of %d)",
839                                                 anAlias->GetName(), iAlias++, nTotAliases));
840                         aDCSError = (GetValueSet(host, port, anAlias->String(), valueSet, kAlias) == 0);
841
842                         if(!aDCSError)
843                         {
844                                 dcsMap.Add(anAlias->Clone(), valueSet);
845                         } else {
846                                 Log(fCurrentDetector,
847                                         Form("ProcessCurrentDetector - Error while retrieving alias %s",
848                                                 anAlias->GetName()));
849                                 UpdateShuttleStatus(AliShuttleStatus::kDCSError);
850                                 dcsMap.DeleteAll();
851                                 return 0;
852                         }
853                 }
854
855                 // Retrieval of Data Points
856                 TObjString* aDP = 0;
857                 Int_t iDP = 0;
858                 Int_t nTotDPs= ((TMap*)fConfig->GetDCSDataPoints(fCurrentDetector))->GetEntries();
859                 TIter iterDP(fConfig->GetDCSDataPoints(fCurrentDetector));
860                 while ((aDP = (TObjString*) iterDP.Next()))
861                 {
862                         TObjArray *valueSet = new TObjArray();
863                         valueSet->SetOwner(1);
864                         if (((iDP-1) % 500) == 0 || iDP == nTotDPs)
865                                 AliInfo(Form("Querying DCS archive: DP %s (%d of %d)",
866                                                 aDP->GetName(), iDP++, nTotDPs));
867                         aDCSError = (GetValueSet(host, port, aDP->String(), valueSet, kDP) == 0);
868
869                         if(!aDCSError)
870                         {
871                                 dcsMap.Add(aDP->Clone(), valueSet);
872                         } else {
873                                 Log(fCurrentDetector,
874                                         Form("ProcessCurrentDetector - Error while retrieving data point %s",
875                                                 aDP->GetName()));
876                                 UpdateShuttleStatus(AliShuttleStatus::kDCSError);
877                                 dcsMap.DeleteAll();
878                                 return 0;
879                         }
880                 }
881         }
882
883         // DCS Archive DB processing successful. Call Preprocessor!
884         UpdateShuttleStatus(AliShuttleStatus::kPPStarted);
885
886         AliPreprocessor* aPreprocessor =
887                 dynamic_cast<AliPreprocessor*> (fPreprocessorMap.GetValue(fCurrentDetector));
888
889         aPreprocessor->Initialize(GetCurrentRun(), GetCurrentStartTime(), GetCurrentEndTime());
890         UInt_t aPPResult = aPreprocessor->Process(&dcsMap);
891
892         UInt_t returnValue = 0;
893         if (aPPResult == 0) { // Preprocessor error
894                 UpdateShuttleStatus(AliShuttleStatus::kPPError);
895                 returnValue = 0;
896         } else if (fGridError == kFALSE) { // process and Grid storage ok!
897                 UpdateShuttleStatus(AliShuttleStatus::kDone);
898                 UpdateShuttleLogbook(fCurrentDetector, "DONE");
899                 Log(fCurrentDetector.Data(),
900                         "ProcessCurrentDetector - Preprocessor and Grid storage ended successfully");
901                 returnValue = 1;
902         } else { // Grid storage error (process ok, but object put in local storage)
903                 UpdateShuttleStatus(AliShuttleStatus::kStoreFailed);
904                 returnValue = 2;
905         }
906
907         dcsMap.DeleteAll();
908
909         return returnValue;
910 }
911
912 //______________________________________________________________________________________________
913 Bool_t AliShuttle::QueryShuttleLogbook(const char* whereClause,
914                 TObjArray& entries)
915 {
916 // Query DAQ's Shuttle logbook and fills detector status object.
917 // Call QueryRunParameters to query DAQ logbook for run parameters.
918
919         entries.SetOwner(1);
920
921         // check connection, in case connect
922         if(!Connect(3)) return kFALSE;
923
924         TString sqlQuery;
925         sqlQuery = Form("select * from logbook_shuttle %s order by run", whereClause);
926
927         TSQLResult* aResult = fServer[3]->Query(sqlQuery);
928         if (!aResult) {
929                 AliError(Form("Can't execute query <%s>!", sqlQuery.Data()));
930                 return kFALSE;
931         }
932
933         AliDebug(2,Form("Query = %s", sqlQuery.Data()));
934
935         if(aResult->GetRowCount() == 0) {
936 //              if(sqlQuery.EndsWith("where shuttle_done=0 order by run")){
937 //                      Log("SHUTTLE", "QueryShuttleLogbook - All runs in Shuttle Logbook are already DONE");
938 //                      delete aResult;
939 //                      return kTRUE;
940 //              } else {
941                         AliInfo("No entries in Shuttle Logbook match request");
942                         delete aResult;
943                         return kTRUE;
944 //              }
945         }
946
947         // TODO Check field count!
948         const UInt_t nCols = 22;
949         if (aResult->GetFieldCount() != (Int_t) nCols) {
950                 AliError("Invalid SQL result field number!");
951                 delete aResult;
952                 return kFALSE;
953         }
954
955         TSQLRow* aRow;
956         while ((aRow = aResult->Next())) {
957                 TString runString(aRow->GetField(0), aRow->GetFieldLength(0));
958                 Int_t run = runString.Atoi();
959
960                 AliShuttleLogbookEntry *entry = QueryRunParameters(run);
961                 if (!entry)
962                         continue;
963
964                 // loop on detectors
965                 for(UInt_t ii = 0; ii < nCols; ii++)
966                         entry->SetDetectorStatus(aResult->GetFieldName(ii), aRow->GetField(ii));
967
968                 entries.AddLast(entry);
969                 delete aRow;
970         }
971
972 //      if(sqlQuery.EndsWith("where shuttle_done=0 order by run"))
973 //              Log("SHUTTLE", Form("QueryShuttleLogbook - Found %d unprocessed runs in Shuttle Logbook",
974 //                                                      entries.GetEntriesFast()));
975         delete aResult;
976         return kTRUE;
977 }
978
979 //______________________________________________________________________________________________
980 AliShuttleLogbookEntry* AliShuttle::QueryRunParameters(Int_t run)
981 {
982         //
983         // Retrieve run parameters written in the DAQ logbook and sets them into AliShuttleLogbookEntry object
984         //
985
986         // check connection, in case connect
987         if (!Connect(3))
988                 return 0;
989
990         TString sqlQuery;
991         sqlQuery.Form("select * from %s where run=%d", fConfig->GetDAQlbTable(), run);
992
993         TSQLResult* aResult = fServer[3]->Query(sqlQuery);
994         if (!aResult) {
995                 AliError(Form("Can't execute query <%s>!", sqlQuery.Data()));
996                 return 0;
997         }
998
999         if (aResult->GetRowCount() == 0) {
1000                 Log("SHUTTLE", Form("QueryRunParameters - No entry in DAQ Logbook for run %d. Skipping", run));
1001                 delete aResult;
1002                 return 0;
1003         }
1004
1005         if (aResult->GetRowCount() > 1) {
1006                 AliError(Form("More than one entry in DAQ Logbook for run %d. Skipping", run));
1007                 delete aResult;
1008                 return 0;
1009         }
1010
1011         TSQLRow* aRow = aResult->Next();
1012         if (!aRow)
1013         {
1014                 AliError(Form("Could not retrieve row for run %d. Skipping", run));
1015                 delete aResult;
1016                 return 0;
1017         }
1018
1019         AliShuttleLogbookEntry* entry = new AliShuttleLogbookEntry(run);
1020
1021         for (Int_t ii = 0; ii < aResult->GetFieldCount(); ii++)
1022                 entry->SetRunParameter(aResult->GetFieldName(ii), aRow->GetField(ii));
1023
1024         UInt_t startTime = entry->GetStartTime();
1025         UInt_t endTime = entry->GetEndTime();
1026
1027         if (!startTime || !endTime || startTime > endTime) {
1028                 Log("SHUTTLE",
1029                         Form("QueryRunParameters - Invalid parameters for Run %d: startTime = %d, endTime = %d",
1030                                 run, startTime, endTime));
1031                 delete entry;
1032                 delete aRow;
1033                 delete aResult;
1034                 return 0;
1035         }
1036
1037         delete aRow;
1038         delete aResult;
1039
1040         return entry;
1041 }
1042
1043 //______________________________________________________________________________________________
1044 Bool_t AliShuttle::TryToStoreAgain()
1045 {
1046   // Called in case the detector failed to store the object in Grid OCDB
1047   // It tries to store the object again, if it does not find more recent and overlapping objects
1048   // Calls underlying TryToStoreAgain(const char*) function twice, for OCDB and Reference storage.
1049
1050         AliInfo("Trying to store OCDB data again...");
1051         Bool_t resultCDB = TryToStoreAgain(fgkMainCDB);
1052
1053         AliInfo("Trying to store reference data again...");
1054         Bool_t resultRef = TryToStoreAgain(fgkMainRefStorage);
1055
1056         return resultCDB && resultRef;
1057 }
1058
1059 //______________________________________________________________________________________________
1060 Bool_t AliShuttle::TryToStoreAgain(TString& gridURI)
1061 {
1062   // Called by TryToStoreAgain(), performs actual storage retry
1063
1064         TObjArray* gridIds=0;
1065
1066         Bool_t result = kTRUE;
1067
1068         const char* type = 0;
1069         TString backupURI;
1070         if(gridURI == fgkMainCDB) {
1071                 type = "OCDB";
1072                 backupURI = fgkLocalCDB;
1073         } else if(gridURI == fgkMainRefStorage) {
1074                 type = "reference";
1075                 backupURI = fgkLocalRefStorage;
1076         } else {
1077                 AliError(Form("Invalid storage URI: %s", gridURI.Data()));
1078                 return kFALSE;
1079         }
1080
1081         AliCDBManager* man = AliCDBManager::Instance();
1082
1083         AliCDBStorage *gridSto = man->GetStorage(gridURI);
1084         if(!gridSto) {
1085                 Log(fCurrentDetector.Data(),
1086                         Form("TryToStoreAgain - cannot activate main %s storage", type));
1087                 return kFALSE;
1088         }
1089
1090         gridIds = gridSto->GetQueryCDBList();
1091
1092         // get objects previously stored in local CDB
1093         AliCDBStorage *backupSto = man->GetStorage(backupURI);
1094         AliCDBPath aPath(GetOfflineDetName(fCurrentDetector.Data()),"*","*");
1095         // Local objects were stored with current run as Grid version!
1096         TList* localEntries = backupSto->GetAll(aPath.GetPath(), GetCurrentRun(), GetCurrentRun());
1097         localEntries->SetOwner(1);
1098
1099         // loop on local stored objects
1100         TIter localIter(localEntries);
1101         AliCDBEntry *aLocEntry = 0;
1102         while((aLocEntry = dynamic_cast<AliCDBEntry*> (localIter.Next()))){
1103                 aLocEntry->SetOwner(1);
1104                 AliCDBId aLocId = aLocEntry->GetId();
1105                 aLocEntry->SetVersion(-1);
1106                 aLocEntry->SetSubVersion(-1);
1107
1108                 // loop on Grid valid Id's
1109                 Bool_t store = kTRUE;
1110                 TIter gridIter(gridIds);
1111                 AliCDBId* aGridId = 0;
1112                 while((aGridId = dynamic_cast<AliCDBId*> (gridIter.Next()))){
1113                         // If local object is valid up to infinity we store it only if it is
1114                         // the first unprocessed run!
1115                         if (aLocId.GetLastRun() == AliCDBRunRange::Infinity())
1116                         {
1117                                 if (!fFirstUnprocessed[GetDetPos(fCurrentDetector)])
1118                                 {
1119                                         Log(fCurrentDetector.Data(),
1120                                                 ("TryToStoreAgain - This object has validity infinite but "
1121                                                  "there are previous unprocessed runs!"));
1122                                         continue;
1123                                 } else {
1124                                         break;
1125                                 }
1126                         }
1127                         if(aGridId->GetPath() != aLocId.GetPath()) continue;
1128                         // skip all objects valid up to infinity
1129                         if(aGridId->GetLastRun() == AliCDBRunRange::Infinity()) continue;
1130                         // if we get here, it means there's already some more recent object stored on Grid!
1131                         store = kFALSE;
1132                         break;
1133                 }
1134
1135                 if(!store){
1136                         Log(fCurrentDetector.Data(),
1137                                 Form("TryToStoreAgain - A more recent object already exists in %s storage: <%s>",
1138                                         type, aGridId->ToString().Data()));
1139                         // removing local filename...
1140                         // TODO maybe it's better not to remove it, it was not copied to the Grid!
1141                         TString filename;
1142                         backupSto->IdToFilename(aLocId, filename);
1143                         AliInfo(Form("Removing local file %s", filename.Data()));
1144                         gSystem->Exec(Form("rm %s",filename.Data()));
1145                         continue;
1146                 }
1147
1148                 // If we get here, the file can be stored!
1149                 Bool_t storeOk = gridSto->Put(aLocEntry);
1150                 if(storeOk){
1151                         Log(fCurrentDetector.Data(),
1152                                 Form("TryToStoreAgain - Object <%s> successfully put into %s storage",
1153                                         aLocId.ToString().Data(), type));
1154
1155                         // removing local filename...
1156                         TString filename;
1157                         backupSto->IdToFilename(aLocId, filename);
1158                         AliInfo(Form("Removing local file %s", filename.Data()));
1159                         gSystem->Exec(Form("rm %s", filename.Data()));
1160                         continue;
1161                 } else  {
1162                         Log(fCurrentDetector.Data(),
1163                                 Form("TryToStoreAgain - Grid %s storage of object <%s> failed again",
1164                                         type, aLocId.ToString().Data()));
1165                         result = kFALSE;
1166                 }
1167         }
1168         localEntries->Clear();
1169
1170         return result;
1171 }
1172
1173 //______________________________________________________________________________________________
1174 Bool_t AliShuttle::GetValueSet(const char* host, Int_t port, const char* entry,
1175                                 TObjArray* valueSet, DCSType type)
1176 {
1177 // Retrieve all "entry" data points from the DCS server
1178 // host, port: TSocket connection parameters
1179 // entry: name of the alias or data point
1180 // valueSet: array of retrieved AliDCSValue's
1181 // type: kAlias or kDP
1182
1183         AliDCSClient client(host, port, fTimeout, fRetries);
1184         if (!client.IsConnected())
1185         {
1186                 return kFALSE;
1187         }
1188
1189         Int_t result=0;
1190
1191         if (type == kAlias)
1192         {
1193                 result = client.GetAliasValues(entry,
1194                         GetCurrentStartTime(), GetCurrentEndTime(), valueSet);
1195         } else
1196         if (type == kDP)
1197         {
1198                 result = client.GetDPValues(entry,
1199                         GetCurrentStartTime(), GetCurrentEndTime(), valueSet);
1200         }
1201
1202         if (result < 0)
1203         {
1204                 Log(fCurrentDetector.Data(), Form("GetValueSet - Can't get '%s'! Reason: %s",
1205                         entry, AliDCSClient::GetErrorString(result)));
1206
1207                 if (result == AliDCSClient::fgkServerError)
1208                 {
1209                         Log(fCurrentDetector.Data(), Form("GetValueSet - Server error: %s",
1210                                 client.GetServerError().Data()));
1211                 }
1212
1213                 return kFALSE;
1214         }
1215
1216         return kTRUE;
1217 }
1218
1219 //______________________________________________________________________________________________
1220 const char* AliShuttle::GetFile(Int_t system, const char* detector,
1221                 const char* id, const char* source)
1222 {
1223 // Get calibration file from file exchange servers
1224 // First queris the FXS database for the file name, using the run, detector, id and source info
1225 // then calls RetrieveFile(filename) for actual copy to local disk
1226 // run: current run being processed (given by Logbook entry fLogbookEntry)
1227 // detector: the Preprocessor name
1228 // id: provided as a parameter by the Preprocessor
1229 // source: provided by the Preprocessor through GetFileSources function
1230
1231         // check connection, in case connect
1232         if (!Connect(system))
1233         {
1234                 Log(detector, Form("GetFile - Couldn't connect to %s FXS database", GetSystemName(system)));
1235                 return 0;
1236         }
1237
1238         // Query preparation
1239         TString sqlQueryStart;
1240         TString whereClause;
1241         TString sourceName(source);
1242         Int_t nFields = 0;
1243         if (system == kDAQ)
1244         {
1245                 sqlQueryStart = Form("select filePath,size from %s where", fConfig->GetFXSdbTable(system));
1246                 whereClause = Form("run=%d and detector=\"%s\" and fileId=\"%s\" and DAQsource=\"%s\"",
1247                                 GetCurrentRun(), detector, id, source);
1248                 nFields = 2;
1249
1250         }
1251         else if (system == kDCS)
1252         {
1253                 sqlQueryStart = Form("select filePath,size from %s where", fConfig->GetFXSdbTable(system));
1254                 whereClause = Form("run=%d and detector=\"%s\" and fileId=\"%s\"",
1255                                 GetCurrentRun(), detector, id);
1256                 nFields = 2;
1257                 sourceName="none";
1258         }
1259         else if (system == kHLT)
1260         {
1261                 sqlQueryStart = Form("select filePath,fileSize,fileChecksum from %s where",
1262                                                                                 fConfig->GetFXSdbTable(system));
1263                 whereClause = Form("run=%d and detector=\"%s\" and fileId=\"%s\" and DDLnumbers=\"%s\"",
1264                                 GetCurrentRun(), detector, id, source);
1265                 nFields = 3;
1266         }
1267
1268         TString sqlQuery = Form("%s %s", sqlQueryStart.Data(), whereClause.Data());
1269
1270         AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
1271
1272         // Query execution
1273         TSQLResult* aResult = 0;
1274         aResult = dynamic_cast<TSQLResult*> (fServer[system]->Query(sqlQuery));
1275         if (!aResult) {
1276                 Log(detector, Form("GetFileName - Can't execute SQL query to %s database for: id = %s, source = %s",
1277                                 GetSystemName(system), id, sourceName.Data()));
1278                 return 0;
1279         }
1280
1281         if(aResult->GetRowCount() == 0)
1282         {
1283                 Log(detector,
1284                         Form("GetFileName - No entry in %s FXS db for: id = %s, source = %s",
1285                                 GetSystemName(system), id, sourceName.Data()));
1286                 delete aResult;
1287                 return 0;
1288         }
1289
1290         if (aResult->GetRowCount() > 1) {
1291                 Log(detector,
1292                         Form("GetFileName - More than one entry in %s FXS db for: id = %s, source = %s",
1293                                 GetSystemName(system), id, sourceName.Data()));
1294                 delete aResult;
1295                 return 0;
1296         }
1297
1298         if (aResult->GetFieldCount() != nFields) {
1299                 Log(detector,
1300                         Form("GetFileName - Wrong field count in %s FXS db for: id = %s, source = %s",
1301                                 GetSystemName(system), id, sourceName.Data()));
1302                 delete aResult;
1303                 return 0;
1304         }
1305
1306         TSQLRow* aRow = dynamic_cast<TSQLRow*> (aResult->Next());
1307
1308         if (!aRow){
1309                 Log(detector, Form("GetFileName - Empty set result in %s FXS db from query: id = %s, source = %s",
1310                                 GetSystemName(system), id, sourceName.Data()));
1311                 delete aResult;
1312                 return 0;
1313         }
1314
1315         TString filePath(aRow->GetField(0), aRow->GetFieldLength(0));
1316         TString fileSize(aRow->GetField(1), aRow->GetFieldLength(1));
1317         TString fileMd5Sum;
1318         if(system == kHLT) fileMd5Sum = aRow->GetField(2);
1319
1320         delete aResult;
1321         delete aRow;
1322
1323         AliDebug(2, Form("filePath = %s",filePath.Data()));
1324
1325         // retrieved file is renamed to make it unique
1326         TString localFileName = Form("%s_%s_%d_%s_%s.shuttle",
1327                                         GetSystemName(system), detector, GetCurrentRun(), id, sourceName.Data());
1328
1329
1330         // file retrieval from FXS
1331         Bool_t result = RetrieveFile(system, filePath.Data(), localFileName.Data());
1332         if(!result)
1333         {
1334                 Log(detector, Form("GetFileName - Copy of file %s from %s FXS failed",
1335                                         filePath.Data(), GetSystemName(system)));
1336                 return 0;
1337         } else {
1338                 AliInfo(Form("File %s copied from %s FXS into %s/%s",
1339                         filePath.Data(), GetSystemName(system), GetShuttleTempDir(), localFileName.Data()));
1340         }
1341
1342         if (system == kHLT)
1343         {
1344                 // compare md5sum of local file with the one stored in the FXS DB
1345                 Int_t md5Comp = gSystem->Exec(Form("md5sum %s/%s |grep %s 2>&1 > /dev/null",
1346                                                 GetShuttleTempDir(), localFileName.Data(), fileMd5Sum.Data()));
1347
1348                 if (md5Comp != 0)
1349                 {
1350                         Log(detector, Form("GetFileName - md5sum of file %s does not match with local copy!",
1351                                                 filePath.Data()));
1352                         return 0;
1353                 }
1354         }
1355
1356         fFXSCalled[system]=kTRUE;
1357         TObjString *fileParams = new TObjString(Form("%s#!?!#%s", id, sourceName.Data()));
1358         fFXSlist[system].Add(fileParams);
1359
1360         static TString fullLocalFileName;
1361         fullLocalFileName = TString::Format("%s/%s", GetShuttleTempDir(), localFileName.Data());
1362
1363         AliInfo(Form("fullLocalFileName = %s", fullLocalFileName.Data()));
1364
1365         return fullLocalFileName.Data();
1366
1367 }
1368
1369 //______________________________________________________________________________________________
1370 Bool_t AliShuttle::RetrieveFile(UInt_t system, const char* fxsFileName, const char* localFileName)
1371 {
1372 // Copies file from FXS to local Shuttle machine
1373
1374         // check temp directory: trying to cd to temp; if it does not exist, create it
1375         AliDebug(2, Form("Copy file %s from %s FXS into %s/%s",
1376                         GetSystemName(system), fxsFileName, GetShuttleTempDir(), localFileName));
1377
1378         void* dir = gSystem->OpenDirectory(GetShuttleTempDir());
1379         if (dir == NULL) {
1380                 if (gSystem->mkdir(GetShuttleTempDir(), kTRUE)) {
1381                         AliError(Form("Can't open directory <%s>", GetShuttleTempDir()));
1382                         return kFALSE;
1383                 }
1384
1385         } else {
1386                 gSystem->FreeDirectory(dir);
1387         }
1388
1389         TString baseFXSFolder;
1390         if (system == kDAQ)
1391         {
1392                 baseFXSFolder = "FES/";
1393         }
1394         else if (system == kDCS)
1395         {
1396                 baseFXSFolder = "";
1397         }
1398         else if (system == kHLT)
1399         {
1400                 baseFXSFolder = "~/";
1401         }
1402
1403
1404         TString command = Form("scp -oPort=%d -2 %s@%s:%s%s %s/%s",
1405                 fConfig->GetFXSPort(system),
1406                 fConfig->GetFXSUser(system),
1407                 fConfig->GetFXSHost(system),
1408                 baseFXSFolder.Data(),
1409                 fxsFileName,
1410                 GetShuttleTempDir(),
1411                 localFileName);
1412
1413         AliDebug(2, Form("%s",command.Data()));
1414
1415         UInt_t nRetries = 0;
1416         UInt_t maxRetries = 3;
1417
1418         // copy!! if successful TSystem::Exec returns 0
1419         while(nRetries++ < maxRetries) {
1420                 AliDebug(2, Form("Trying to copy file. Retry # %d", nRetries));
1421                 if(gSystem->Exec(command.Data()) == 0) return kTRUE;
1422         }
1423
1424         return kFALSE;
1425 }
1426
1427 //______________________________________________________________________________________________
1428 TList* AliShuttle::GetFileSources(Int_t system, const char* detector, const char* id)
1429 {
1430 // Get sources producing the condition file Id from file exchange servers
1431
1432         if (system == kDCS)
1433         {
1434                 AliError("DCS system has only one source of data!");
1435                 return NULL;
1436
1437         }
1438
1439         // check connection, in case connect
1440         if (!Connect(system))
1441         {
1442                 Log(detector, Form("GetFile - Couldn't connect to %s FXS database", GetSystemName(system)));
1443                 return NULL;
1444         }
1445
1446         TString sourceName = 0;
1447         if (system == kDAQ)
1448         {
1449                 sourceName = "DAQsource";
1450         } else if (system == kHLT)
1451         {
1452                 sourceName = "DDLnumbers";
1453         }
1454
1455         TString sqlQueryStart = Form("select %s from %s where", sourceName.Data(), fConfig->GetFXSdbTable(kDAQ));
1456         TString whereClause = Form("run=%d and detector=\"%s\" and fileId=\"%s\"",
1457                                 GetCurrentRun(), detector, id);
1458         TString sqlQuery = Form("%s %s", sqlQueryStart.Data(), whereClause.Data());
1459
1460         AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
1461
1462         // Query execution
1463         TSQLResult* aResult;
1464         aResult = fServer[system]->Query(sqlQuery);
1465         if (!aResult) {
1466                 Log(detector, Form("GetFileSources - Can't execute SQL query to %s database for id: %s",
1467                                 GetSystemName(system), id));
1468                 return 0;
1469         }
1470
1471         if (aResult->GetRowCount() == 0)
1472         {
1473                 Log(detector,
1474                         Form("GetFileSources - No entry in %s FXS table for id: %s", GetSystemName(system), id));
1475                 delete aResult;
1476                 return 0;
1477         }
1478
1479         TSQLRow* aRow;
1480         TList *list = new TList();
1481         list->SetOwner(1);
1482
1483         while ((aRow = aResult->Next()))
1484         {
1485
1486                 TString source(aRow->GetField(0), aRow->GetFieldLength(0));
1487                 AliDebug(2, Form("%s = %s", sourceName.Data(), source.Data()));
1488                 list->Add(new TObjString(source));
1489                 delete aRow;
1490         }
1491
1492         delete aResult;
1493
1494         return list;
1495 }
1496
1497 //______________________________________________________________________________________________
1498 Bool_t AliShuttle::Connect(Int_t system)
1499 {
1500 // Connect to MySQL Server of the system's FXS MySQL databases
1501 // DAQ Logbook, Shuttle Logbook and DAQ FXS db are on the same host
1502
1503         // check connection: if already connected return
1504         if(fServer[system] && fServer[system]->IsConnected()) return kTRUE;
1505
1506         TString dbHost, dbUser, dbPass, dbName;
1507
1508         if (system < 3) // FXS db servers
1509         {
1510                 dbHost = Form("mysql://%s:%d", fConfig->GetFXSdbHost(system), fConfig->GetFXSdbPort(system));
1511                 dbUser = fConfig->GetFXSdbUser(system);
1512                 dbPass = fConfig->GetFXSdbPass(system);
1513                 dbName =   fConfig->GetFXSdbName(system);
1514         } else { // Run & Shuttle logbook servers
1515         // TODO Will the Shuttle logbook server be the same as the Run logbook server ???
1516                 dbHost = Form("mysql://%s:%d", fConfig->GetDAQlbHost(), fConfig->GetDAQlbPort());
1517                 dbUser = fConfig->GetDAQlbUser();
1518                 dbPass = fConfig->GetDAQlbPass();
1519                 dbName =   fConfig->GetDAQlbDB();
1520         }
1521
1522         fServer[system] = TSQLServer::Connect(dbHost.Data(), dbUser.Data(), dbPass.Data());
1523         if (!fServer[system] || !fServer[system]->IsConnected()) {
1524                 if(system < 3)
1525                 {
1526                 AliError(Form("Can't establish connection to FXS database for %s",
1527                                         AliShuttleInterface::GetSystemName(system)));
1528                 } else {
1529                 AliError("Can't establish connection to Run logbook.");
1530                 }
1531                 if(fServer[system]) delete fServer[system];
1532                 return kFALSE;
1533         }
1534
1535         // Get tables
1536         TSQLResult* aResult=0;
1537         switch(system){
1538                 case kDAQ:
1539                         aResult = fServer[kDAQ]->GetTables(dbName.Data());
1540                         break;
1541                 case kDCS:
1542                         aResult = fServer[kDCS]->GetTables(dbName.Data());
1543                         break;
1544                 case kHLT:
1545                         aResult = fServer[kHLT]->GetTables(dbName.Data());
1546                         break;
1547                 default:
1548                         aResult = fServer[3]->GetTables(dbName.Data());
1549                         break;
1550         }
1551
1552         delete aResult;
1553         return kTRUE;
1554 }
1555
1556 //______________________________________________________________________________________________
1557 Bool_t AliShuttle::UpdateTable()
1558 {
1559 // Update FXS table filling time_processed field in all rows corresponding to current run and detector
1560
1561         Bool_t result = kTRUE;
1562
1563         for (UInt_t system=0; system<3; system++)
1564         {
1565                 if(!fFXSCalled[system]) continue;
1566
1567                 // check connection, in case connect
1568                 if (!Connect(system))
1569                 {
1570                         Log(fCurrentDetector, Form("UpdateTable - Couldn't connect to %s FXS database", GetSystemName(system)));
1571                         result = kFALSE;
1572                         continue;
1573                 }
1574
1575                 TTimeStamp now; // now
1576
1577                 // Loop on FXS list entries
1578                 TIter iter(&fFXSlist[system]);
1579                 TObjString *aFXSentry=0;
1580                 while ((aFXSentry = dynamic_cast<TObjString*> (iter.Next())))
1581                 {
1582                         TString aFXSentrystr = aFXSentry->String();
1583                         TObjArray *aFXSarray = aFXSentrystr.Tokenize("#!?!#");
1584                         if (!aFXSarray || aFXSarray->GetEntries() != 2 )
1585                         {
1586                                 Log(fCurrentDetector, Form("UpdateTable - error updating %s FXS entry. Check string: <%s>",
1587                                         GetSystemName(system), aFXSentrystr.Data()));
1588                                 if(aFXSarray) delete aFXSarray;
1589                                 result = kFALSE;
1590                                 continue;
1591                         }
1592                         const char* fileId = ((TObjString*) aFXSarray->At(0))->GetName();
1593                         const char* source = ((TObjString*) aFXSarray->At(1))->GetName();
1594
1595                         TString whereClause;
1596                         if (system == kDAQ)
1597                         {
1598                                 whereClause = Form("where run=%d and detector=\"%s\" and fileId=\"%s\" and DAQsource=\"%s\";",
1599                                                         GetCurrentRun(), fCurrentDetector.Data(), fileId, source);
1600                         }
1601                         else if (system == kDCS)
1602                         {
1603                                 whereClause = Form("where run=%d and detector=\"%s\" and fileId=\"%s\";",
1604                                                         GetCurrentRun(), fCurrentDetector.Data(), fileId);
1605                         }
1606                         else if (system == kHLT)
1607                         {
1608                                 whereClause = Form("where run=%d and detector=\"%s\" and fileId=\"%s\" and DDLnumbers=\"%s\";",
1609                                                         GetCurrentRun(), fCurrentDetector.Data(), fileId, source);
1610                         }
1611
1612                         delete aFXSarray;
1613
1614                         TString sqlQuery = Form("update %s set time_processed=%d %s", fConfig->GetFXSdbTable(system),
1615                                                                 now.GetSec(), whereClause.Data());
1616
1617                         AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
1618
1619                         // Query execution
1620                         TSQLResult* aResult;
1621                         aResult = dynamic_cast<TSQLResult*> (fServer[system]->Query(sqlQuery));
1622                         if (!aResult)
1623                         {
1624                                 Log(fCurrentDetector, Form("UpdateTable - %s db: can't execute SQL query <%s>",
1625                                                                 GetSystemName(system), sqlQuery.Data()));
1626                                 result = kFALSE;
1627                                 continue;
1628                         }
1629                         delete aResult;
1630                 }
1631         }
1632
1633         return result;
1634 }
1635
1636 //______________________________________________________________________________________________
1637 Bool_t AliShuttle::UpdateShuttleLogbook(const char* detector, const char* status)
1638 {
1639         //
1640         // Update Shuttle logbook filling detector or shuttle_done column
1641         // ex. of usage: UpdateShuttleLogbook("PHOS", "DONE") or UpdateShuttleLogbook("shuttle_done")
1642         //
1643
1644         // check connection, in case connect
1645         if(!Connect(3)){
1646                 Log("SHUTTLE", "UpdateShuttleLogbook - Couldn't connect to DAQ Logbook.");
1647                 return kFALSE;
1648         }
1649
1650         TString detName(detector);
1651         TString setClause;
1652         if(detName == "shuttle_done")
1653         {
1654                 setClause = "set shuttle_done=1";
1655
1656                 // Send the information to ML
1657                 TMonaLisaText  mlStatus("SHUTTLE_status", "Done");
1658
1659                 TList mlList;
1660                 mlList.Add(&mlStatus);
1661
1662                 fMonaLisa->SendParameters(&mlList);
1663         } else {
1664                 TString statusStr(status);
1665                 if(statusStr.Contains("done", TString::kIgnoreCase) ||
1666                    statusStr.Contains("failed", TString::kIgnoreCase)){
1667                         setClause = Form("set %s=\"%s\"", detector, status);
1668                 } else {
1669                         Log("SHUTTLE",
1670                                 Form("UpdateShuttleLogbook - Invalid status <%s> for detector %s",
1671                                         status, detector));
1672                         return kFALSE;
1673                 }
1674         }
1675
1676         TString whereClause = Form("where run=%d", GetCurrentRun());
1677
1678         TString sqlQuery = Form("update logbook_shuttle %s %s",
1679                                         setClause.Data(), whereClause.Data());
1680
1681         AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
1682
1683         // Query execution
1684         TSQLResult* aResult;
1685         aResult = dynamic_cast<TSQLResult*> (fServer[3]->Query(sqlQuery));
1686         if (!aResult) {
1687                 Log("SHUTTLE", Form("UpdateShuttleLogbook - Can't execute query <%s>", sqlQuery.Data()));
1688                 return kFALSE;
1689         }
1690         delete aResult;
1691
1692         return kTRUE;
1693 }
1694
1695 //______________________________________________________________________________________________
1696 Int_t AliShuttle::GetCurrentRun() const
1697 {
1698 // Get current run from logbook entry
1699
1700         return fLogbookEntry ? fLogbookEntry->GetRun() : -1;
1701 }
1702
1703 //______________________________________________________________________________________________
1704 UInt_t AliShuttle::GetCurrentStartTime() const
1705 {
1706 // get current start time
1707
1708         return fLogbookEntry ? fLogbookEntry->GetStartTime() : 0;
1709 }
1710
1711 //______________________________________________________________________________________________
1712 UInt_t AliShuttle::GetCurrentEndTime() const
1713 {
1714 // get current end time from logbook entry
1715
1716         return fLogbookEntry ? fLogbookEntry->GetEndTime() : 0;
1717 }
1718
1719 //______________________________________________________________________________________________
1720 void AliShuttle::Log(const char* detector, const char* message)
1721 {
1722 // Fill log string with a message
1723
1724         void* dir = gSystem->OpenDirectory(GetShuttleLogDir());
1725         if (dir == NULL) {
1726                 if (gSystem->mkdir(GetShuttleLogDir(), kTRUE)) {
1727                         AliError(Form("Can't open directory <%s>", GetShuttleLogDir()));
1728                         return;
1729                 }
1730
1731         } else {
1732                 gSystem->FreeDirectory(dir);
1733         }
1734
1735         TString toLog = Form("%s (%d): %s - ", TTimeStamp(time(0)).AsString("s"), getpid(), detector);
1736         if (GetCurrentRun() >= 0) 
1737                 toLog += Form("run %d - ", GetCurrentRun());
1738         toLog += Form("%s", message);
1739
1740         AliInfo(toLog.Data());
1741
1742         TString fileName;
1743         if (GetCurrentRun() >= 0) 
1744                 fileName.Form("%s/%s_%d.log", GetShuttleLogDir(), detector, GetCurrentRun());
1745         else
1746                 fileName.Form("%s/%s.log", GetShuttleLogDir(), detector);
1747         
1748         gSystem->ExpandPathName(fileName);
1749
1750         ofstream logFile;
1751         logFile.open(fileName, ofstream::out | ofstream::app);
1752
1753         if (!logFile.is_open()) {
1754                 AliError(Form("Could not open file %s", fileName.Data()));
1755                 return;
1756         }
1757
1758         logFile << toLog.Data() << "\n";
1759
1760         logFile.close();
1761 }
1762
1763 //______________________________________________________________________________________________
1764 Bool_t AliShuttle::Collect(Int_t run)
1765 {
1766 //
1767 // Collects conditions data for all UNPROCESSED run written to DAQ LogBook in case of run = -1 (default)
1768 // If a dedicated run is given this run is processed
1769 //
1770 // In operational mode, this is the Shuttle function triggered by the EOR signal.
1771 //
1772
1773         if (run == -1)
1774                 Log("SHUTTLE","Collect - Shuttle called. Collecting conditions data for unprocessed runs");
1775         else
1776                 Log("SHUTTLE", Form("Collect - Shuttle called. Collecting conditions data for run %d", run));
1777
1778         SetLastAction("Starting");
1779
1780         TString whereClause("where shuttle_done=0");
1781         if (run != -1)
1782                 whereClause += Form(" and run=%d", run);
1783
1784         TObjArray shuttleLogbookEntries;
1785         if (!QueryShuttleLogbook(whereClause, shuttleLogbookEntries))
1786         {
1787                 Log("SHUTTLE", "Collect - Can't retrieve entries from Shuttle logbook");
1788                 return kFALSE;
1789         }
1790
1791         if (shuttleLogbookEntries.GetEntries() == 0)
1792         {
1793                 if (run == -1)
1794                         Log("SHUTTLE","Collect - Found no UNPROCESSED runs in Shuttle logbook");
1795                 else
1796                         Log("SHUTTLE", Form("Collect - Run %d is already DONE "
1797                                                 "or it does not exist in Shuttle logbook", run));
1798                 return kTRUE;
1799         }
1800
1801         for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
1802                 fFirstUnprocessed[iDet] = kTRUE;
1803
1804         if (run != -1)
1805         {
1806                 // query Shuttle logbook for earlier runs, check if some detectors are unprocessed,
1807                 // flag them into fFirstUnprocessed array
1808                 TString whereClause(Form("where shuttle_done=0 and run < %d", run));
1809                 TObjArray tmpLogbookEntries;
1810                 if (!QueryShuttleLogbook(whereClause, tmpLogbookEntries))
1811                 {
1812                         Log("SHUTTLE", "Collect - Can't retrieve entries from Shuttle logbook");
1813                         return kFALSE;
1814                 }
1815
1816                 TIter iter(&tmpLogbookEntries);
1817                 AliShuttleLogbookEntry* anEntry = 0;
1818                 while ((anEntry = dynamic_cast<AliShuttleLogbookEntry*> (iter.Next())))
1819                 {
1820                         for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
1821                         {
1822                                 if (anEntry->GetDetectorStatus(iDet) == AliShuttleLogbookEntry::kUnprocessed)
1823                                 {
1824                                         AliDebug(2, Form("Run %d: setting %s as \"not first time unprocessed\"",
1825                                                         anEntry->GetRun(), GetDetName(iDet)));
1826                                         fFirstUnprocessed[iDet] = kFALSE;
1827                                 }
1828                         }
1829
1830                 }
1831
1832         }
1833
1834         if (!RetrieveConditionsData(shuttleLogbookEntries))
1835         {
1836                 Log("SHUTTLE", "Collect - Process of at least one run failed");
1837                 return kFALSE;
1838         }
1839
1840         Log("SHUTTLE", "Collect - Requested run(s) successfully processed");
1841         return kTRUE;
1842 }
1843
1844 //______________________________________________________________________________________________
1845 Bool_t AliShuttle::RetrieveConditionsData(const TObjArray& dateEntries)
1846 {
1847 // Retrieve conditions data for all runs that aren't processed yet
1848
1849         Bool_t hasError = kFALSE;
1850
1851         TIter iter(&dateEntries);
1852         AliShuttleLogbookEntry* anEntry;
1853
1854         while ((anEntry = (AliShuttleLogbookEntry*) iter.Next())){
1855                 if (!Process(anEntry)){
1856                         hasError = kTRUE;
1857                 }
1858         }
1859
1860         return hasError == kFALSE;
1861 }
1862
1863 //______________________________________________________________________________________________
1864 ULong_t AliShuttle::GetTimeOfLastAction() const
1865 {
1866         ULong_t tmp;
1867
1868         fMonitoringMutex->Lock();
1869
1870         tmp = fLastActionTime;
1871
1872         fMonitoringMutex->UnLock();
1873
1874         return tmp;
1875 }
1876
1877 //______________________________________________________________________________________________
1878 const TString AliShuttle::GetLastAction() const
1879 {
1880         // returns a string description of the last action
1881
1882         TString tmp;
1883
1884         fMonitoringMutex->Lock();
1885         
1886         tmp = fLastAction;
1887         
1888         fMonitoringMutex->UnLock();
1889
1890         return tmp;
1891 }
1892
1893 //______________________________________________________________________________________________
1894 void AliShuttle::SetLastAction(const char* action)
1895 {
1896         // updates the monitoring variables
1897
1898         fMonitoringMutex->Lock();
1899
1900         fLastAction = action;
1901         fLastActionTime = time(0);
1902         
1903         fMonitoringMutex->UnLock();
1904 }
1905
1906 //______________________________________________________________________________________________
1907 const char* AliShuttle::GetRunParameter(const char* param)
1908 {
1909 // returns run parameter read from DAQ logbook
1910
1911         if(!fLogbookEntry) {
1912                 AliError("No logbook entry!");
1913                 return 0;
1914         }
1915
1916         return fLogbookEntry->GetRunParameter(param);
1917 }
1918
1919 //______________________________________________________________________________________________
1920 Bool_t AliShuttle::SendMail()
1921 {
1922 // sends a mail to the subdetector expert in case of preprocessor error
1923
1924         void* dir = gSystem->OpenDirectory(GetShuttleLogDir());
1925         if (dir == NULL)
1926         {
1927                 if (gSystem->mkdir(GetShuttleLogDir(), kTRUE))
1928                 {
1929                         AliError(Form("Can't open directory <%s>", GetShuttleLogDir()));
1930                         return kFALSE;
1931                 }
1932
1933         } else {
1934                 gSystem->FreeDirectory(dir);
1935         }
1936
1937         TString bodyFileName;
1938         bodyFileName.Form("%s/mail.body", GetShuttleLogDir());
1939         gSystem->ExpandPathName(bodyFileName);
1940
1941         ofstream mailBody;
1942         mailBody.open(bodyFileName, ofstream::out);
1943
1944         if (!mailBody.is_open())
1945         {
1946                 AliError(Form("Could not open mail body file %s", bodyFileName.Data()));
1947                 return kFALSE;
1948         }
1949
1950         TString to="";
1951         TIter iterExperts(fConfig->GetResponsibles(fCurrentDetector));
1952         TObjString *anExpert=0;
1953         while ((anExpert = (TObjString*) iterExperts.Next()))
1954         {
1955                 to += Form("%s,", anExpert->GetName());
1956         }
1957         to.Remove(to.Length()-1);
1958         AliDebug(2, Form("to: %s",to.Data()));
1959
1960         // TODO this will be removed...
1961         if (to.Contains("not_yet_set")) {
1962                 AliInfo("List of detector responsibles not yet set!");
1963                 return kFALSE;
1964         }
1965
1966         TString cc="alberto.colla@cern.ch";
1967
1968         TString subject = Form("%s Shuttle preprocessor error in run %d !",
1969                                 fCurrentDetector.Data(), GetCurrentRun());
1970         AliDebug(2, Form("subject: %s", subject.Data()));
1971
1972         TString body = Form("Dear %s expert(s), \n\n", fCurrentDetector.Data());
1973         body += Form("SHUTTLE just detected that your preprocessor "
1974                         "exited with ERROR state in run %d!!\n\n", GetCurrentRun());
1975         body += Form("Please check %s status on the web page asap!\n\n", fCurrentDetector.Data());
1976         body += Form("The last 10 lines of %s log file are following:\n\n");
1977
1978         AliDebug(2, Form("Body begin: %s", body.Data()));
1979
1980         mailBody << body.Data();
1981         mailBody.close();
1982         mailBody.open(bodyFileName, ofstream::out | ofstream::app);
1983
1984         TString logFileName = Form("%s/%s_%d.log", GetShuttleLogDir(), fCurrentDetector.Data(), GetCurrentRun());
1985         TString tailCommand = Form("tail -n 10 %s >> %s", logFileName.Data(), bodyFileName.Data());
1986         if (gSystem->Exec(tailCommand.Data()))
1987         {
1988                 mailBody << Form("%s log file not found ...\n\n", fCurrentDetector.Data());
1989         }
1990
1991         TString endBody = Form("------------------------------------------------------\n\n");
1992         endBody += Form("In case of problems please contact the SHUTTLE core team.\n\n");
1993         endBody += "Please do not answer this message directly, it is automatically generated.\n\n";
1994         endBody += "Sincerely yours,\n\n \t\t\tthe SHUTTLE\n";
1995
1996         AliDebug(2, Form("Body end: %s", endBody.Data()));
1997
1998         mailBody << endBody.Data();
1999
2000         mailBody.close();
2001
2002         // send mail!
2003         TString mailCommand = Form("mail -s \"%s\" -c %s %s < %s",
2004                                                 subject.Data(),
2005                                                 cc.Data(),
2006                                                 to.Data(),
2007                                                 bodyFileName.Data());
2008         AliDebug(2, Form("mail command: %s", mailCommand.Data()));
2009
2010         Bool_t result = gSystem->Exec(mailCommand.Data());
2011
2012         return result == 0;
2013 }
2014
2015 //______________________________________________________________________________________________
2016 void AliShuttle::SetShuttleTempDir(const char* tmpDir)
2017 {
2018 // sets Shuttle temp directory
2019
2020         fgkShuttleTempDir = gSystem->ExpandPathName(tmpDir);
2021 }
2022
2023 //______________________________________________________________________________________________
2024 void AliShuttle::SetShuttleLogDir(const char* logDir)
2025 {
2026 // sets Shuttle log directory
2027
2028         fgkShuttleLogDir = gSystem->ExpandPathName(logDir);
2029 }