bb3f99097061a0bb70b5bd5e27208fb70717c552
[u/mrichter/AliRoot.git] / SHUTTLE / AliShuttle.cxx
1 /**************************************************************************
2  * Copyright(c) 1998-1999, ALICE Experiment at CERN, All rights reserved. *
3  *                                                                        *
4  * Author: The ALICE Off-line Project.                                    *
5  * Contributors are mentioned in the code where appropriate.              *
6  *                                                                        *
7  * Permission to use, copy, modify and distribute this software and its   *
8  * documentation strictly for non-commercial purposes is hereby granted   *
9  * without fee, provided that the above copyright notice appears in all   *
10  * copies and that both the copyright notice and this permission notice   *
11  * appear in the supporting documentation. The authors make no claims     *
12  * about the suitability of this software for any purpose. It is          *
13  * provided "as is" without express or implied warranty.                  *
14  **************************************************************************/
15
16 /*
17 $Log$
18 Revision 1.27  2007/01/30 17:52:42  jgrosseo
19 adding monalisa monitoring
20
21 Revision 1.26  2007/01/23 19:20:03  acolla
22 Removed old ldif files, added TOF, MCH ldif files. Added some options in
23 AliShuttleConfig::Print. Added in Ali Shuttle: SetShuttleTempDir and
24 SetShuttleLogDir
25
26 Revision 1.25  2007/01/15 19:13:52  acolla
27 Moved some AliInfo to AliDebug in SendMail function
28
29 Revision 1.21  2006/12/07 08:51:26  jgrosseo
30 update (alberto):
31 table, db names in ldap configuration
32 added GRP preprocessor
33 DCS data can also be retrieved by data point
34
35 Revision 1.20  2006/11/16 16:16:48  jgrosseo
36 introducing strict run ordering flag
37 removed giving preprocessor name to preprocessor, they have to know their name themselves ;-)
38
39 Revision 1.19  2006/11/06 14:23:04  jgrosseo
40 major update (Alberto)
41 o) reading of run parameters from the logbook
42 o) online offline naming conversion
43 o) standalone DCSclient package
44
45 Revision 1.18  2006/10/20 15:22:59  jgrosseo
46 o) Adding time out to the execution of the preprocessors: The Shuttle forks and the parent process monitors the child
47 o) Merging Collect, CollectAll, CollectNew function
48 o) Removing implementation of empty copy constructors (declaration still there!)
49
50 Revision 1.17  2006/10/05 16:20:55  jgrosseo
51 adapting to new CDB classes
52
53 Revision 1.16  2006/10/05 15:46:26  jgrosseo
54 applying to the new interface
55
56 Revision 1.15  2006/10/02 16:38:39  jgrosseo
57 update (alberto):
58 fixed memory leaks
59 storing of objects that failed to be stored to the grid before
60 interfacing of shuttle status table in daq system
61
62 Revision 1.14  2006/08/29 09:16:05  jgrosseo
63 small update
64
65 Revision 1.13  2006/08/15 10:50:00  jgrosseo
66 effc++ corrections (alberto)
67
68 Revision 1.12  2006/08/08 14:19:29  jgrosseo
69 Update to shuttle classes (Alberto)
70
71 - Possibility to set the full object's path in the Preprocessor's and
72 Shuttle's  Store functions
73 - Possibility to extend the object's run validity in the same classes
74 ("startValidity" and "validityInfinite" parameters)
75 - Implementation of the StoreReferenceData function to store reference
76 data in a dedicated CDB storage.
77
78 Revision 1.11  2006/07/21 07:37:20  jgrosseo
79 last run is stored after each run
80
81 Revision 1.10  2006/07/20 09:54:40  jgrosseo
82 introducing status management: The processing per subdetector is divided into several steps,
83 after each step the status is stored on disk. If the system crashes in any of the steps the Shuttle
84 can keep track of the number of failures and skips further processing after a certain threshold is
85 exceeded. These thresholds can be configured in LDAP.
86
87 Revision 1.9  2006/07/19 10:09:55  jgrosseo
88 new configuration, accesst to DAQ FES (Alberto)
89
90 Revision 1.8  2006/07/11 12:44:36  jgrosseo
91 adding parameters for extended validity range of data produced by preprocessor
92
93 Revision 1.7  2006/07/10 14:37:09  jgrosseo
94 small fix + todo comment
95
96 Revision 1.6  2006/07/10 13:01:41  jgrosseo
97 enhanced storing of last sucessfully processed run (alberto)
98
99 Revision 1.5  2006/07/04 14:59:57  jgrosseo
100 revision of AliDCSValue: Removed wrapper classes, reduced storage size per value by factor 2
101
102 Revision 1.4  2006/06/12 09:11:16  jgrosseo
103 coding conventions (Alberto)
104
105 Revision 1.3  2006/06/06 14:26:40  jgrosseo
106 o) removed files that were moved to STEER
107 o) shuttle updated to follow the new interface (Alberto)
108
109 Revision 1.2  2006/03/07 07:52:34  hristov
110 New version (B.Yordanov)
111
112 Revision 1.6  2005/11/19 17:19:14  byordano
113 RetrieveDATEEntries and RetrieveConditionsData added
114
115 Revision 1.5  2005/11/19 11:09:27  byordano
116 AliShuttle declaration added
117
118 Revision 1.4  2005/11/17 17:47:34  byordano
119 TList changed to TObjArray
120
121 Revision 1.3  2005/11/17 14:43:23  byordano
122 import to local CVS
123
124 Revision 1.1.1.1  2005/10/28 07:33:58  hristov
125 Initial import as subdirectory in AliRoot
126
127 Revision 1.2  2005/09/13 08:41:15  byordano
128 default startTime endTime added
129
130 Revision 1.4  2005/08/30 09:13:02  byordano
131 some docs added
132
133 Revision 1.3  2005/08/29 21:15:47  byordano
134 some docs added
135
136 */
137
138 //
139 // This class is the main manager for AliShuttle. 
140 // It organizes the data retrieval from DCS and call the 
141 // interface methods of AliPreprocessor.
142 // For every detector in AliShuttleConfgi (see AliShuttleConfig),
143 // data for its set of aliases is retrieved. If there is registered
144 // AliPreprocessor for this detector then it will be used
145 // accroding to the schema (see AliPreprocessor).
146 // If there isn't registered AliPreprocessor than the retrieved
147 // data is stored automatically to the undelying AliCDBStorage.
148 // For detSpec is used the alias name.
149 //
150
151 #include "AliShuttle.h"
152
153 #include "AliCDBManager.h"
154 #include "AliCDBStorage.h"
155 #include "AliCDBId.h"
156 #include "AliCDBRunRange.h"
157 #include "AliCDBPath.h"
158 #include "AliCDBEntry.h"
159 #include "AliShuttleConfig.h"
160 #include "DCSClient/AliDCSClient.h"
161 #include "AliLog.h"
162 #include "AliPreprocessor.h"
163 #include "AliShuttleStatus.h"
164 #include "AliShuttleLogbookEntry.h"
165
166 #include <TSystem.h>
167 #include <TObject.h>
168 #include <TString.h>
169 #include <TTimeStamp.h>
170 #include <TObjString.h>
171 #include <TSQLServer.h>
172 #include <TSQLResult.h>
173 #include <TSQLRow.h>
174 #include <TMutex.h>
175
176 #include <TMonaLisaWriter.h>
177
178 #include <fstream>
179
180 #include <sys/types.h>
181 #include <sys/wait.h>
182
183 ClassImp(AliShuttle)
184
185 Bool_t AliShuttle::fgkProcessDCS(kTRUE);
186
187 //______________________________________________________________________________________________
188 AliShuttle::AliShuttle(const AliShuttleConfig* config,
189                 UInt_t timeout, Int_t retries):
190 fConfig(config),
191 fTimeout(timeout), fRetries(retries),
192 fPreprocessorMap(),
193 fLogbookEntry(0),
194 fCurrentDetector(),
195 fStatusEntry(0),
196 fGridError(kFALSE),
197 fMonitoringMutex(0),
198 fLastActionTime(0),
199 fLastAction(),
200 fMonaLisa(0)
201 {
202         //
203         // config: AliShuttleConfig used
204         // timeout: timeout used for AliDCSClient connection
205         // retries: the number of retries in case of connection error.
206         //
207
208         if (!fConfig->IsValid()) AliFatal("********** !!!!! Invalid configuration !!!!! **********");
209         for(int iSys=0;iSys<4;iSys++) {
210                 fServer[iSys]=0;
211                 if (iSys < 3)
212                         fFXSlist[iSys].SetOwner(kTRUE);
213         }
214         fPreprocessorMap.SetOwner(kTRUE);
215
216         for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
217                 fFirstUnprocessed[iDet] = kFALSE;
218
219         fMonitoringMutex = new TMutex();
220 }
221
222 //______________________________________________________________________________________________
223 AliShuttle::~AliShuttle()
224 {
225 // destructor
226
227         fPreprocessorMap.DeleteAll();
228         for(int iSys=0;iSys<4;iSys++)
229                 if(fServer[iSys]) {
230                         fServer[iSys]->Close();
231                         delete fServer[iSys];
232                         fServer[iSys] = 0;
233                 }
234
235         if (fStatusEntry){
236                 delete fStatusEntry;
237                 fStatusEntry = 0;
238         }
239         
240         if (fMonitoringMutex) 
241         {
242                 delete fMonitoringMutex;
243                 fMonitoringMutex = 0;
244         }
245 }
246
247 //______________________________________________________________________________________________
248 void AliShuttle::RegisterPreprocessor(AliPreprocessor* preprocessor)
249 {
250         //
251         // Registers new AliPreprocessor.
252         // It uses GetName() for indentificator of the pre processor.
253         // The pre processor is registered it there isn't any other
254         // with the same identificator (GetName()).
255         //
256
257         const char* detName = preprocessor->GetName();
258         if(GetDetPos(detName) < 0)
259                 AliFatal(Form("********** !!!!! Invalid detector name: %s !!!!! **********", detName));
260
261         if (fPreprocessorMap.GetValue(detName)) {
262                 AliWarning(Form("AliPreprocessor %s is already registered!", detName));
263                 return;
264         }
265
266         fPreprocessorMap.Add(new TObjString(detName), preprocessor);
267 }
268 //______________________________________________________________________________________________
269 UInt_t AliShuttle::Store(const AliCDBPath& path, TObject* object,
270                 AliCDBMetaData* metaData, Int_t validityStart, Bool_t validityInfinite)
271 {
272   // Stores a CDB object in the storage for offline reconstruction. Objects that are not needed for
273   // offline reconstruction, but should be stored anyway (e.g. for debugging) should NOT be stored
274   // using this function. Use StoreReferenceData instead!
275   // It calls WriteToCDB function which perform actual storage
276
277         return WriteToCDB(fgkMainCDB, fgkLocalCDB, path, object,
278                                 metaData, validityStart, validityInfinite);
279
280 }
281
282 //______________________________________________________________________________________________
283 UInt_t AliShuttle::StoreReferenceData(const AliCDBPath& path, TObject* object, AliCDBMetaData* metaData)
284 {
285   // Stores a CDB object in the storage for reference data. This objects will not be available during
286   // offline reconstrunction. Use this function for reference data only!
287   // It calls WriteToCDB function which perform actual storage
288
289         return WriteToCDB(fgkMainRefStorage, fgkLocalRefStorage, path, object, metaData);
290
291 }
292
293 //______________________________________________________________________________________________
294 UInt_t AliShuttle::WriteToCDB(const char* mainUri, const char* localUri,
295                         const AliCDBPath& path, TObject* object, AliCDBMetaData* metaData,
296                         Int_t validityStart, Bool_t validityInfinite)
297 {
298   // write object into the CDB. Parameters are passed by Store and StoreReferenceData functions.
299   // The parameters are:
300   //   1) Uri of the main storage (Grid)
301   //   2) Uri of the backup storage (Local)
302   //   3) the object's path.
303   //   4) the object to be stored
304   //   5) the metaData to be associated with the object
305   //   6) the validity start run number w.r.t. the current run,
306   //      if the data is valid only for this run leave the default 0
307   //   7) specifies if the calibration data is valid for infinity (this means until updated),
308   //      typical for calibration runs, the default is kFALSE
309   //
310   // returns 0 if fail
311   //         1 if stored in main (Grid) storage
312   //         2 if stored in backup (Local) storage
313
314         const char* cdbType = (mainUri == fgkMainCDB) ? "CDB" : "Reference";
315
316         Int_t firstRun = GetCurrentRun() - validityStart;
317         if(firstRun < 0) {
318                 AliError("First valid run happens to be less than 0! Setting it to 0.");
319                 firstRun=0;
320         }
321
322         Int_t lastRun = -1;
323         if(validityInfinite) {
324                 lastRun = AliCDBRunRange::Infinity();
325         } else {
326                 lastRun = GetCurrentRun();
327         }
328
329         AliCDBId id(path, firstRun, lastRun, -1, -1);
330
331         if(! dynamic_cast<TObjString*> (metaData->GetProperty("RunUsed(TObjString)"))){
332                 TObjString runUsed = Form("%d", GetCurrentRun());
333                 metaData->SetProperty("RunUsed(TObjString)", runUsed.Clone());
334         }
335
336         UInt_t result = 0;
337
338         if (!(AliCDBManager::Instance()->GetStorage(mainUri))) {
339                 AliError(Form("WriteToCDB - Cannot activate main %s storage", cdbType));
340         } else {
341                 result = (UInt_t) AliCDBManager::Instance()->GetStorage(mainUri)
342                                         ->Put(object, id, metaData);
343         }
344
345         if(!result) {
346
347                 Log(fCurrentDetector,
348                         Form("WriteToCDB - Problem with main %s storage. Putting <%s> into backup storage",
349                                 cdbType, path.GetPath().Data()));
350
351                 // Set Grid version to current run number, to ease retrieval later
352                 id.SetVersion(GetCurrentRun());
353
354                 result = AliCDBManager::Instance()->GetStorage(localUri)
355                                         ->Put(object, id, metaData);
356
357                 if(result) {
358                         result = 2;
359                         fGridError = kTRUE;
360                 }else{
361                         Log(fCurrentDetector, "WriteToCDB - Can't store data!");
362                 }
363         }
364
365         return result;
366
367 }
368
369 //______________________________________________________________________________________________
370 AliShuttleStatus* AliShuttle::ReadShuttleStatus()
371 {
372 // Reads the AliShuttleStatus from the CDB
373
374         if (fStatusEntry){
375                 delete fStatusEntry;
376                 fStatusEntry = 0;
377         }
378
379         fStatusEntry = AliCDBManager::Instance()->GetStorage(GetLocalCDB())
380                 ->Get(Form("/SHUTTLE/STATUS/%s", fCurrentDetector.Data()), GetCurrentRun());
381
382         if (!fStatusEntry) return 0;
383         fStatusEntry->SetOwner(1);
384
385         AliShuttleStatus* status = dynamic_cast<AliShuttleStatus*> (fStatusEntry->GetObject());
386         if (!status) {
387                 AliError("Invalid object stored to CDB!");
388                 return 0;
389         }
390
391         return status;
392 }
393
394 //______________________________________________________________________________________________
395 Bool_t AliShuttle::WriteShuttleStatus(AliShuttleStatus* status)
396 {
397 // writes the status for one subdetector
398
399         if (fStatusEntry){
400                 delete fStatusEntry;
401                 fStatusEntry = 0;
402         }
403
404         Int_t run = GetCurrentRun();
405
406         AliCDBId id(AliCDBPath("SHUTTLE", "STATUS", fCurrentDetector), run, run);
407
408         fStatusEntry = new AliCDBEntry(status, id, new AliCDBMetaData);
409         fStatusEntry->SetOwner(1);
410
411         UInt_t result = AliCDBManager::Instance()->GetStorage(fgkLocalCDB)->Put(fStatusEntry);
412
413         if (!result) {
414                 AliError(Form("WriteShuttleStatus for %s, run %d failed", fCurrentDetector.Data(), run));
415                 return kFALSE;
416         }
417         
418         SendMLInfo();
419
420         return kTRUE;
421 }
422
423 //______________________________________________________________________________________________
424 void AliShuttle::UpdateShuttleStatus(AliShuttleStatus::Status newStatus, Bool_t increaseCount)
425 {
426   // changes the AliShuttleStatus for the given detector and run to the given status
427
428         if (!fStatusEntry){
429                 AliError("UNEXPECTED: fStatusEntry empty");
430                 return;
431         }
432
433         AliShuttleStatus* status = dynamic_cast<AliShuttleStatus*> (fStatusEntry->GetObject());
434
435         if (!status){
436                 AliError("UNEXPECTED: status could not be read from current CDB entry");
437                 return;
438         }
439
440         TString actionStr = Form("UpdateShuttleStatus - %s: Changing state from %s to %s",
441                                 fCurrentDetector.Data(),
442                                 status->GetStatusName(),
443                                 status->GetStatusName(newStatus));
444         Log("SHUTTLE", actionStr);
445         SetLastAction(actionStr);
446
447         status->SetStatus(newStatus);
448         if (increaseCount) status->IncreaseCount();
449
450         AliCDBManager::Instance()->GetStorage(fgkLocalCDB)->Put(fStatusEntry);
451
452         SendMLInfo();
453 }
454
455 //______________________________________________________________________________________________
456 void AliShuttle::SendMLInfo()
457 {
458         //
459         // sends ML information about the current status of the current detector being processed
460         //
461         
462         AliShuttleStatus* status = dynamic_cast<AliShuttleStatus*> (fStatusEntry->GetObject());
463         
464         if (!status){
465                 AliError("UNEXPECTED: status could not be read from current CDB entry");
466                 return;
467         }
468         
469         TMonaLisaText  mlStatus(Form("%s_status", fCurrentDetector.Data()), status->GetStatusName());
470         TMonaLisaValue mlRetryCount(Form("%s_count", fCurrentDetector.Data()), status->GetCount());
471
472         TList mlList;
473         mlList.Add(&mlStatus);
474         mlList.Add(&mlRetryCount);
475
476         fMonaLisa->SendParameters(&mlList);
477 }
478
479 //______________________________________________________________________________________________
480 Bool_t AliShuttle::ContinueProcessing()
481 {
482 // this function reads the AliShuttleStatus information from CDB and
483 // checks if the processing should be continued
484 // if yes it returns kTRUE and updates the AliShuttleStatus with nextStatus
485
486         if (!fConfig->HostProcessDetector(fCurrentDetector)) return kFALSE;
487
488         AliPreprocessor* aPreprocessor =
489                 dynamic_cast<AliPreprocessor*> (fPreprocessorMap.GetValue(fCurrentDetector));
490         if (!aPreprocessor)
491         {
492                 AliInfo(Form("%s: no preprocessor registered", fCurrentDetector.Data()));
493                 return kFALSE;
494         }
495
496         AliShuttleLogbookEntry::Status entryStatus =
497                 fLogbookEntry->GetDetectorStatus(fCurrentDetector);
498
499         if(entryStatus != AliShuttleLogbookEntry::kUnprocessed) {
500                 AliInfo(Form("ContinueProcessing - %s is %s",
501                                 fCurrentDetector.Data(),
502                                 fLogbookEntry->GetDetectorStatusName(entryStatus)));
503                 return kFALSE;
504         }
505
506         // if we get here, according to Shuttle logbook subdetector is in UNPROCESSED state
507
508         // check if current run is first unprocessed run for current detector
509         if (fConfig->StrictRunOrder(fCurrentDetector) &&
510                 !fFirstUnprocessed[GetDetPos(fCurrentDetector)])
511         {
512                 Log("SHUTTLE", Form("ContinueProcessing - %s requires strict run ordering but this is not the first unprocessed run!"));
513                 return kFALSE;
514         }
515
516         AliShuttleStatus* status = ReadShuttleStatus();
517         if (!status) {
518                 // first time
519                 Log("SHUTTLE", Form("ContinueProcessing - %s: Processing first time",
520                                 fCurrentDetector.Data()));
521                 status = new AliShuttleStatus(AliShuttleStatus::kStarted);
522                 return WriteShuttleStatus(status);
523         }
524
525         // The following two cases shouldn't happen if Shuttle Logbook was correctly updated.
526         // If it happens it may mean Logbook updating failed... let's do it now!
527         if (status->GetStatus() == AliShuttleStatus::kDone ||
528             status->GetStatus() == AliShuttleStatus::kFailed){
529                 Log("SHUTTLE", Form("ContinueProcessing - %s is already %s. Updating Shuttle Logbook",
530                                         fCurrentDetector.Data(),
531                                         status->GetStatusName(status->GetStatus())));
532                 UpdateShuttleLogbook(fCurrentDetector.Data(),
533                                         status->GetStatusName(status->GetStatus()));
534                 return kFALSE;
535         }
536
537         if (status->GetStatus() == AliShuttleStatus::kStoreFailed) {
538                 Log("SHUTTLE",
539                         Form("ContinueProcessing - %s: Grid storage of one or more objects failed. Trying again now",
540                                 fCurrentDetector.Data()));
541                 if(TryToStoreAgain()){
542                         Log(fCurrentDetector.Data(), "ContinueProcessing - All objects successfully stored into OCDB");
543                         UpdateShuttleStatus(AliShuttleStatus::kDone);
544                         UpdateShuttleLogbook(fCurrentDetector.Data(), "DONE");
545                 } else {
546                         Log("SHUTTLE",
547                                 Form("ContinueProcessing - %s: Grid storage failed again",
548                                         fCurrentDetector.Data()));
549                         // trigger ML information manually because we do not had a status change
550                         SendMLInfo();
551                 }
552                 return kFALSE;
553         }
554
555         // if we get here, there is a restart
556         Bool_t cont = kFALSE;
557
558         // abort conditions
559         if (status->GetCount() >= fConfig->GetMaxRetries()) {
560                 Log("SHUTTLE", Form("ContinueProcessing - %s failed %d times in status %s - "
561                                 "Updating Shuttle Logbook", fCurrentDetector.Data(),
562                                 status->GetCount(), status->GetStatusName()));
563                 UpdateShuttleLogbook(fCurrentDetector.Data(), "FAILED");
564                 UpdateShuttleStatus(AliShuttleStatus::kFailed);
565         } else {
566                 Log("SHUTTLE", Form("ContinueProcessing - %s: restarting. "
567                                 "Aborted before with %s. Retry number %d.", fCurrentDetector.Data(),
568                                 status->GetStatusName(), status->GetCount()));
569                 UpdateShuttleStatus(AliShuttleStatus::kStarted, kTRUE);
570                 cont = kTRUE;
571         }
572
573         // Send mail to detector expert!
574         AliInfo(Form("Sending mail to %s expert...", fCurrentDetector.Data()));
575         if (!SendMail())
576                 Log("SHUTTLE", Form("ContinueProcessing - Could not send mail to %s expert",
577                                 fCurrentDetector.Data()));
578
579         return cont;
580 }
581
582 //______________________________________________________________________________________________
583 Bool_t AliShuttle::Process(AliShuttleLogbookEntry* entry)
584 {
585         //
586         // Makes data retrieval for all detectors in the configuration.
587         // entry: Shuttle logbook entry, contains run paramenters and status of detectors
588         // (Unprocessed, Inactive, Failed or Done).
589         // Returns kFALSE in case of error occured and kTRUE otherwise
590         //
591
592         if(!entry) return kFALSE;
593
594         fLogbookEntry = entry;
595
596         if (fLogbookEntry->IsDone())
597         {
598                 Log("SHUTTLE","Process - Shuttle is already DONE. Updating logbook");
599                 UpdateShuttleLogbook("shuttle_done");
600                 fLogbookEntry = 0;
601                 return kTRUE;
602         }
603
604         // create ML instance that monitors this run
605         fMonaLisa = new TMonaLisaWriter(Form("%d", GetCurrentRun()), "SHUTTLE", "aliendb1.cern.ch");
606         // disable monitoring of other parameters that come e.g. from TFile
607         gMonitoringWriter = 0;
608
609         AliInfo(Form("\n\n \t\t\t^*^*^*^*^*^*^*^*^*^*^*^* run %d: START ^*^*^*^*^*^*^*^*^*^*^*^* \n",
610                                         GetCurrentRun()));
611
612
613         // Send the information to ML
614         TMonaLisaText  mlStatus("SHUTTLE_status", "Processing");
615
616         TList mlList;
617         mlList.Add(&mlStatus);
618
619         fMonaLisa->SendParameters(&mlList);
620                         
621         fLogbookEntry->Print("all");
622
623         // Initialization
624         Bool_t hasError = kFALSE;
625
626         AliCDBStorage *mainCDBSto = AliCDBManager::Instance()->GetStorage(fgkMainCDB);
627         if(mainCDBSto) mainCDBSto->QueryCDB(GetCurrentRun());
628         AliCDBStorage *mainRefSto = AliCDBManager::Instance()->GetStorage(fgkMainRefStorage);
629         if(mainRefSto) mainRefSto->QueryCDB(GetCurrentRun());
630
631         // Loop on detectors in the configuration
632         TIter iter(fConfig->GetDetectors());
633         TObjString* aDetector = 0;
634
635         while ((aDetector = (TObjString*) iter.Next()))
636         {
637                 fCurrentDetector = aDetector->String();
638
639                 if (ContinueProcessing() == kFALSE) continue;
640
641                 AliInfo(Form("\n\n \t\t\t****** run %d - %s: START  ******",
642                                                 GetCurrentRun(), aDetector->GetName()));
643
644                 for(Int_t iSys=0;iSys<3;iSys++) fFXSCalled[iSys]=kFALSE;
645
646                 Log(fCurrentDetector.Data(), "Starting processing");
647
648                 Int_t pid = fork();
649
650                 if (pid < 0)
651                 {
652                         Log("SHUTTLE", "ERROR: Forking failed");
653                 }
654                 else if (pid > 0)
655                 {
656                         // parent
657                         AliInfo(Form("In parent process of %d - %s: Starting monitoring",
658                                                         GetCurrentRun(), aDetector->GetName()));
659
660                         Long_t begin = time(0);
661
662                         int status; // to be used with waitpid, on purpose an int (not Int_t)!
663                         while (waitpid(pid, &status, WNOHANG) == 0)
664                         {
665                                 Long_t expiredTime = time(0) - begin;
666
667                                 if (expiredTime > fConfig->GetPPTimeOut())
668                                 {
669                                         Log("SHUTTLE", Form("Process time out. Run time: %d seconds. Killing...",
670                                                                 expiredTime));
671
672                                         kill(pid, 9);
673
674                                         hasError = kTRUE;
675
676                                         gSystem->Sleep(1000);
677                                 }
678                                 else
679                                 {
680                                         if (expiredTime % 60 == 0)
681                                         Log("SHUTTLE", Form("Checked process. Run time: %d seconds.",
682                                                                 expiredTime));
683                                         gSystem->Sleep(1000);
684                                 }
685                         }
686
687                         AliInfo(Form("In parent process of %d - %s: Client has terminated.",
688                                                                 GetCurrentRun(), aDetector->GetName()));
689
690                         if (WIFEXITED(status))
691                         {
692                                 Int_t returnCode = WEXITSTATUS(status);
693
694                                 Log("SHUTTLE", Form("The return code is %d", returnCode));
695
696                                 if (returnCode != 0)
697                                 hasError = kTRUE;
698                         }
699                 }
700                 else if (pid == 0)
701                 {
702                         // client
703                         AliInfo(Form("In client process of %d - %s", GetCurrentRun(), aDetector->GetName()));
704
705                         UInt_t result = ProcessCurrentDetector();
706
707                         Int_t returnCode = 0; // will be set to 1 in case of an error
708
709                         if (!result)
710                         {
711                                 returnCode = 1;
712                                 AliInfo(Form("\n \t\t\t****** run %d - %s: PREPROCESSOR ERROR ****** \n\n",
713                                                         GetCurrentRun(), aDetector->GetName()));
714                         }
715                         else if (result == 2)
716                         {
717                                 AliInfo(Form("\n \t\t\t****** run %d - %s: STORAGE ERROR ****** \n\n",
718                                                         GetCurrentRun(), aDetector->GetName()));
719                         } else
720                         {
721                                 AliInfo(Form("\n \t\t\t****** run %d - %s: DONE ****** \n\n",
722                                                         GetCurrentRun(), aDetector->GetName()));
723                         }
724
725                         if (result > 0)
726                         {
727                                 // Process successful: Update time_processed field in FXS logbooks!
728                                 if (UpdateTable() == kFALSE) returnCode = 1;
729                         }
730
731                         for (UInt_t iSys=0; iSys<3; iSys++)
732                         {
733                                 if (fFXSCalled[iSys]) fFXSlist[iSys].Clear();
734                         }
735
736                         AliInfo(Form("Client process of %d - %s is exiting now with %d.",
737                                                         GetCurrentRun(), aDetector->GetName(), returnCode));
738
739                         // the client exits here
740                         gSystem->Exit(returnCode);
741
742                         AliError("We should never get here!!!");
743                 }
744         }
745
746         AliInfo(Form("\n\n \t\t\t^*^*^*^*^*^*^*^*^*^*^*^* run %d: FINISH ^*^*^*^*^*^*^*^*^*^*^*^* \n",
747                                                         GetCurrentRun()));
748
749         //check if shuttle is done for this run, if so update logbook
750         TObjArray checkEntryArray;
751         checkEntryArray.SetOwner(1);
752         TString whereClause = Form("where run=%d", GetCurrentRun());
753         if (!QueryShuttleLogbook(whereClause.Data(), checkEntryArray) || checkEntryArray.GetEntries() == 0) {
754                 Log("SHUTTLE", Form("Process - Warning: Cannot check status of run %d on Shuttle logbook!",
755                                                 GetCurrentRun()));
756                 return hasError == kFALSE;
757         }
758
759         AliShuttleLogbookEntry* checkEntry = dynamic_cast<AliShuttleLogbookEntry*>
760                                                 (checkEntryArray.At(0));
761
762         if (checkEntry)
763         {
764                 if (checkEntry->IsDone())
765                 {
766                         Log("SHUTTLE","Process - Shuttle is DONE. Updating logbook");
767                         UpdateShuttleLogbook("shuttle_done");
768                 }
769                 else
770                 {
771                         for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
772                         {
773                                 if (checkEntry->GetDetectorStatus(iDet) == AliShuttleLogbookEntry::kUnprocessed)
774                                 {
775                                         AliDebug(2, Form("Run %d: setting %s as \"not first time unprocessed\"",
776                                                         checkEntry->GetRun(), GetDetName(iDet)));
777                                         fFirstUnprocessed[iDet] = kFALSE;
778                                 }
779                         }
780                 }
781         }
782
783         // remove ML instance
784         delete fMonaLisa;
785         fMonaLisa = 0;
786
787         fLogbookEntry = 0;
788
789         return hasError == kFALSE;
790 }
791
792 //______________________________________________________________________________________________
793 UInt_t AliShuttle::ProcessCurrentDetector()
794 {
795         //
796         // Makes data retrieval just for a specific detector (fCurrentDetector).
797         // Threre should be a configuration for this detector.
798
799         AliInfo(Form("Retrieving values for %s, run %d", fCurrentDetector.Data(), GetCurrentRun()));
800
801         UpdateShuttleStatus(AliShuttleStatus::kDCSStarted);
802
803         TMap dcsMap;
804         dcsMap.SetOwner(1);
805
806         Bool_t aDCSError = kFALSE;
807         fGridError = kFALSE;
808
809         // TODO Test only... I've added a flag that allows to
810         // exclude DCS archive DB query
811         if (!fgkProcessDCS)
812         {
813                 AliInfo("Skipping DCS processing!");
814                 aDCSError = kFALSE;
815         } else {
816                 TString host(fConfig->GetDCSHost(fCurrentDetector));
817                 Int_t port = fConfig->GetDCSPort(fCurrentDetector);
818
819                 // Retrieval of Aliases
820                 TObjString* anAlias = 0;
821                 Int_t iAlias = 1;
822                 Int_t nTotAliases= ((TMap*)fConfig->GetDCSAliases(fCurrentDetector))->GetEntries();
823                 TIter iterAliases(fConfig->GetDCSAliases(fCurrentDetector));
824                 while ((anAlias = (TObjString*) iterAliases.Next()))
825                 {
826                         TObjArray *valueSet = new TObjArray();
827                         valueSet->SetOwner(1);
828
829                         if (((iAlias-1) % 500) == 0 || iAlias == nTotAliases)
830                                 AliInfo(Form("Querying DCS archive: alias %s (%d of %d)",
831                                                 anAlias->GetName(), iAlias++, nTotAliases));
832                         aDCSError = (GetValueSet(host, port, anAlias->String(), valueSet, kAlias) == 0);
833
834                         if(!aDCSError)
835                         {
836                                 dcsMap.Add(anAlias->Clone(), valueSet);
837                         } else {
838                                 Log(fCurrentDetector,
839                                         Form("ProcessCurrentDetector - Error while retrieving alias %s",
840                                                 anAlias->GetName()));
841                                 UpdateShuttleStatus(AliShuttleStatus::kDCSError);
842                                 dcsMap.DeleteAll();
843                                 return 0;
844                         }
845                 }
846
847                 // Retrieval of Data Points
848                 TObjString* aDP = 0;
849                 Int_t iDP = 0;
850                 Int_t nTotDPs= ((TMap*)fConfig->GetDCSDataPoints(fCurrentDetector))->GetEntries();
851                 TIter iterDP(fConfig->GetDCSDataPoints(fCurrentDetector));
852                 while ((aDP = (TObjString*) iterDP.Next()))
853                 {
854                         TObjArray *valueSet = new TObjArray();
855                         valueSet->SetOwner(1);
856                         if (((iDP-1) % 500) == 0 || iDP == nTotDPs)
857                                 AliInfo(Form("Querying DCS archive: DP %s (%d of %d)",
858                                                 aDP->GetName(), iDP++, nTotDPs));
859                         aDCSError = (GetValueSet(host, port, aDP->String(), valueSet, kDP) == 0);
860
861                         if(!aDCSError)
862                         {
863                                 dcsMap.Add(aDP->Clone(), valueSet);
864                         } else {
865                                 Log(fCurrentDetector,
866                                         Form("ProcessCurrentDetector - Error while retrieving data point %s",
867                                                 aDP->GetName()));
868                                 UpdateShuttleStatus(AliShuttleStatus::kDCSError);
869                                 dcsMap.DeleteAll();
870                                 return 0;
871                         }
872                 }
873         }
874
875         // DCS Archive DB processing successful. Call Preprocessor!
876         UpdateShuttleStatus(AliShuttleStatus::kPPStarted);
877
878         AliPreprocessor* aPreprocessor =
879                 dynamic_cast<AliPreprocessor*> (fPreprocessorMap.GetValue(fCurrentDetector));
880
881         aPreprocessor->Initialize(GetCurrentRun(), GetCurrentStartTime(), GetCurrentEndTime());
882         UInt_t aPPResult = aPreprocessor->Process(&dcsMap);
883
884         UInt_t returnValue = 0;
885         if (aPPResult == 0) { // Preprocessor error
886                 UpdateShuttleStatus(AliShuttleStatus::kPPError);
887                 returnValue = 0;
888         } else if (fGridError == kFALSE) { // process and Grid storage ok!
889                 UpdateShuttleStatus(AliShuttleStatus::kDone);
890                 UpdateShuttleLogbook(fCurrentDetector, "DONE");
891                 Log(fCurrentDetector.Data(),
892                         "ProcessCurrentDetector - Preprocessor and Grid storage ended successfully");
893                 returnValue = 1;
894         } else { // Grid storage error (process ok, but object put in local storage)
895                 UpdateShuttleStatus(AliShuttleStatus::kStoreFailed);
896                 returnValue = 2;
897         }
898
899         dcsMap.DeleteAll();
900
901         return returnValue;
902 }
903
904 //______________________________________________________________________________________________
905 Bool_t AliShuttle::QueryShuttleLogbook(const char* whereClause,
906                 TObjArray& entries)
907 {
908 // Query DAQ's Shuttle logbook and fills detector status object.
909 // Call QueryRunParameters to query DAQ logbook for run parameters.
910
911         entries.SetOwner(1);
912
913         // check connection, in case connect
914         if(!Connect(3)) return kFALSE;
915
916         TString sqlQuery;
917         sqlQuery = Form("select * from logbook_shuttle %s order by run", whereClause);
918
919         TSQLResult* aResult = fServer[3]->Query(sqlQuery);
920         if (!aResult) {
921                 AliError(Form("Can't execute query <%s>!", sqlQuery.Data()));
922                 return kFALSE;
923         }
924
925         AliDebug(2,Form("Query = %s", sqlQuery.Data()));
926
927         if(aResult->GetRowCount() == 0) {
928 //              if(sqlQuery.EndsWith("where shuttle_done=0 order by run")){
929 //                      Log("SHUTTLE", "QueryShuttleLogbook - All runs in Shuttle Logbook are already DONE");
930 //                      delete aResult;
931 //                      return kTRUE;
932 //              } else {
933                         AliInfo("No entries in Shuttle Logbook match request");
934                         delete aResult;
935                         return kTRUE;
936 //              }
937         }
938
939         // TODO Check field count!
940         const UInt_t nCols = 22;
941         if (aResult->GetFieldCount() != (Int_t) nCols) {
942                 AliError("Invalid SQL result field number!");
943                 delete aResult;
944                 return kFALSE;
945         }
946
947         TSQLRow* aRow;
948         while ((aRow = aResult->Next())) {
949                 TString runString(aRow->GetField(0), aRow->GetFieldLength(0));
950                 Int_t run = runString.Atoi();
951
952                 AliShuttleLogbookEntry *entry = QueryRunParameters(run);
953                 if (!entry)
954                         continue;
955
956                 // loop on detectors
957                 for(UInt_t ii = 0; ii < nCols; ii++)
958                         entry->SetDetectorStatus(aResult->GetFieldName(ii), aRow->GetField(ii));
959
960                 entries.AddLast(entry);
961                 delete aRow;
962         }
963
964 //      if(sqlQuery.EndsWith("where shuttle_done=0 order by run"))
965 //              Log("SHUTTLE", Form("QueryShuttleLogbook - Found %d unprocessed runs in Shuttle Logbook",
966 //                                                      entries.GetEntriesFast()));
967         delete aResult;
968         return kTRUE;
969 }
970
971 //______________________________________________________________________________________________
972 AliShuttleLogbookEntry* AliShuttle::QueryRunParameters(Int_t run)
973 {
974         //
975         // Retrieve run parameters written in the DAQ logbook and sets them into AliShuttleLogbookEntry object
976         //
977
978         // check connection, in case connect
979         if (!Connect(3))
980                 return 0;
981
982         TString sqlQuery;
983         sqlQuery.Form("select * from %s where run=%d", fConfig->GetDAQlbTable(), run);
984
985         TSQLResult* aResult = fServer[3]->Query(sqlQuery);
986         if (!aResult) {
987                 AliError(Form("Can't execute query <%s>!", sqlQuery.Data()));
988                 return 0;
989         }
990
991         if (aResult->GetRowCount() == 0) {
992                 Log("SHUTTLE", Form("QueryRunParameters - No entry in DAQ Logbook for run %d. Skipping", run));
993                 delete aResult;
994                 return 0;
995         }
996
997         if (aResult->GetRowCount() > 1) {
998                 AliError(Form("More than one entry in DAQ Logbook for run %d. Skipping", run));
999                 delete aResult;
1000                 return 0;
1001         }
1002
1003         TSQLRow* aRow = aResult->Next();
1004         if (!aRow)
1005         {
1006                 AliError(Form("Could not retrieve row for run %d. Skipping", run));
1007                 delete aResult;
1008                 return 0;
1009         }
1010
1011         AliShuttleLogbookEntry* entry = new AliShuttleLogbookEntry(run);
1012
1013         for (Int_t ii = 0; ii < aResult->GetFieldCount(); ii++)
1014                 entry->SetRunParameter(aResult->GetFieldName(ii), aRow->GetField(ii));
1015
1016         UInt_t startTime = entry->GetStartTime();
1017         UInt_t endTime = entry->GetEndTime();
1018
1019         if (!startTime || !endTime || startTime > endTime) {
1020                 Log("SHUTTLE",
1021                         Form("QueryRunParameters - Invalid parameters for Run %d: startTime = %d, endTime = %d",
1022                                 run, startTime, endTime));
1023                 delete entry;
1024                 delete aRow;
1025                 delete aResult;
1026                 return 0;
1027         }
1028
1029         delete aRow;
1030         delete aResult;
1031
1032         return entry;
1033 }
1034
1035 //______________________________________________________________________________________________
1036 Bool_t AliShuttle::TryToStoreAgain()
1037 {
1038   // Called in case the detector failed to store the object in Grid OCDB
1039   // It tries to store the object again, if it does not find more recent and overlapping objects
1040   // Calls underlying TryToStoreAgain(const char*) function twice, for OCDB and Reference storage.
1041
1042         AliInfo("Trying to store OCDB data again...");
1043         Bool_t resultCDB = TryToStoreAgain(fgkMainCDB);
1044
1045         AliInfo("Trying to store reference data again...");
1046         Bool_t resultRef = TryToStoreAgain(fgkMainRefStorage);
1047
1048         return resultCDB && resultRef;
1049 }
1050
1051 //______________________________________________________________________________________________
1052 Bool_t AliShuttle::TryToStoreAgain(TString& gridURI)
1053 {
1054   // Called by TryToStoreAgain(), performs actual storage retry
1055
1056         TObjArray* gridIds=0;
1057
1058         Bool_t result = kTRUE;
1059
1060         const char* type = 0;
1061         TString backupURI;
1062         if(gridURI == fgkMainCDB) {
1063                 type = "OCDB";
1064                 backupURI = fgkLocalCDB;
1065         } else if(gridURI == fgkMainRefStorage) {
1066                 type = "reference";
1067                 backupURI = fgkLocalRefStorage;
1068         } else {
1069                 AliError(Form("Invalid storage URI: %s", gridURI.Data()));
1070                 return kFALSE;
1071         }
1072
1073         AliCDBManager* man = AliCDBManager::Instance();
1074
1075         AliCDBStorage *gridSto = man->GetStorage(gridURI);
1076         if(!gridSto) {
1077                 Log(fCurrentDetector.Data(),
1078                         Form("TryToStoreAgain - cannot activate main %s storage", type));
1079                 return kFALSE;
1080         }
1081
1082         gridIds = gridSto->GetQueryCDBList();
1083
1084         // get objects previously stored in local CDB
1085         AliCDBStorage *backupSto = man->GetStorage(backupURI);
1086         AliCDBPath aPath(GetOfflineDetName(fCurrentDetector.Data()),"*","*");
1087         // Local objects were stored with current run as Grid version!
1088         TList* localEntries = backupSto->GetAll(aPath.GetPath(), GetCurrentRun(), GetCurrentRun());
1089         localEntries->SetOwner(1);
1090
1091         // loop on local stored objects
1092         TIter localIter(localEntries);
1093         AliCDBEntry *aLocEntry = 0;
1094         while((aLocEntry = dynamic_cast<AliCDBEntry*> (localIter.Next()))){
1095                 aLocEntry->SetOwner(1);
1096                 AliCDBId aLocId = aLocEntry->GetId();
1097                 aLocEntry->SetVersion(-1);
1098                 aLocEntry->SetSubVersion(-1);
1099
1100                 // loop on Grid valid Id's
1101                 Bool_t store = kTRUE;
1102                 TIter gridIter(gridIds);
1103                 AliCDBId* aGridId = 0;
1104                 while((aGridId = dynamic_cast<AliCDBId*> (gridIter.Next()))){
1105                         // If local object is valid up to infinity we store it only if it is
1106                         // the first unprocessed run!
1107                         if (aLocId.GetLastRun() == AliCDBRunRange::Infinity())
1108                         {
1109                                 if (!fFirstUnprocessed[GetDetPos(fCurrentDetector)])
1110                                 {
1111                                         Log(fCurrentDetector.Data(),
1112                                                 ("TryToStoreAgain - This object has validity infinite but "
1113                                                  "there are previous unprocessed runs!"));
1114                                         continue;
1115                                 } else {
1116                                         break;
1117                                 }
1118                         }
1119                         if(aGridId->GetPath() != aLocId.GetPath()) continue;
1120                         // skip all objects valid up to infinity
1121                         if(aGridId->GetLastRun() == AliCDBRunRange::Infinity()) continue;
1122                         // if we get here, it means there's already some more recent object stored on Grid!
1123                         store = kFALSE;
1124                         break;
1125                 }
1126
1127                 if(!store){
1128                         Log(fCurrentDetector.Data(),
1129                                 Form("TryToStoreAgain - A more recent object already exists in %s storage: <%s>",
1130                                         type, aGridId->ToString().Data()));
1131                         // removing local filename...
1132                         // TODO maybe it's better not to remove it, it was not copied to the Grid!
1133                         TString filename;
1134                         backupSto->IdToFilename(aLocId, filename);
1135                         AliInfo(Form("Removing local file %s", filename.Data()));
1136                         gSystem->Exec(Form("rm %s",filename.Data()));
1137                         continue;
1138                 }
1139
1140                 // If we get here, the file can be stored!
1141                 Bool_t storeOk = gridSto->Put(aLocEntry);
1142                 if(storeOk){
1143                         Log(fCurrentDetector.Data(),
1144                                 Form("TryToStoreAgain - Object <%s> successfully put into %s storage",
1145                                         aLocId.ToString().Data(), type));
1146
1147                         // removing local filename...
1148                         TString filename;
1149                         backupSto->IdToFilename(aLocId, filename);
1150                         AliInfo(Form("Removing local file %s", filename.Data()));
1151                         gSystem->Exec(Form("rm %s", filename.Data()));
1152                         continue;
1153                 } else  {
1154                         Log(fCurrentDetector.Data(),
1155                                 Form("TryToStoreAgain - Grid %s storage of object <%s> failed again",
1156                                         type, aLocId.ToString().Data()));
1157                         result = kFALSE;
1158                 }
1159         }
1160         localEntries->Clear();
1161
1162         return result;
1163 }
1164
1165 //______________________________________________________________________________________________
1166 Bool_t AliShuttle::GetValueSet(const char* host, Int_t port, const char* entry,
1167                                 TObjArray* valueSet, DCSType type)
1168 {
1169 // Retrieve all "entry" data points from the DCS server
1170 // host, port: TSocket connection parameters
1171 // entry: name of the alias or data point
1172 // valueSet: array of retrieved AliDCSValue's
1173 // type: kAlias or kDP
1174
1175         AliDCSClient client(host, port, fTimeout, fRetries);
1176         if (!client.IsConnected())
1177         {
1178                 return kFALSE;
1179         }
1180
1181         Int_t result=0;
1182
1183         if (type == kAlias)
1184         {
1185                 result = client.GetAliasValues(entry,
1186                         GetCurrentStartTime(), GetCurrentEndTime(), valueSet);
1187         } else
1188         if (type == kDP)
1189         {
1190                 result = client.GetDPValues(entry,
1191                         GetCurrentStartTime(), GetCurrentEndTime(), valueSet);
1192         }
1193
1194         if (result < 0)
1195         {
1196                 Log(fCurrentDetector.Data(), Form("GetValueSet - Can't get '%s'! Reason: %s",
1197                         entry, AliDCSClient::GetErrorString(result)));
1198
1199                 if (result == AliDCSClient::fgkServerError)
1200                 {
1201                         Log(fCurrentDetector.Data(), Form("GetValueSet - Server error: %s",
1202                                 client.GetServerError().Data()));
1203                 }
1204
1205                 return kFALSE;
1206         }
1207
1208         return kTRUE;
1209 }
1210
1211 //______________________________________________________________________________________________
1212 const char* AliShuttle::GetFile(Int_t system, const char* detector,
1213                 const char* id, const char* source)
1214 {
1215 // Get calibration file from file exchange servers
1216 // First queris the FXS database for the file name, using the run, detector, id and source info
1217 // then calls RetrieveFile(filename) for actual copy to local disk
1218 // run: current run being processed (given by Logbook entry fLogbookEntry)
1219 // detector: the Preprocessor name
1220 // id: provided as a parameter by the Preprocessor
1221 // source: provided by the Preprocessor through GetFileSources function
1222
1223         // check connection, in case connect
1224         if (!Connect(system))
1225         {
1226                 Log(detector, Form("GetFile - Couldn't connect to %s FXS database", GetSystemName(system)));
1227                 return 0;
1228         }
1229
1230         // Query preparation
1231         TString sqlQueryStart;
1232         TString whereClause;
1233         TString sourceName(source);
1234         Int_t nFields = 0;
1235         if (system == kDAQ)
1236         {
1237                 sqlQueryStart = Form("select filePath,size from %s where", fConfig->GetFXSdbTable(system));
1238                 whereClause = Form("run=%d and detector=\"%s\" and fileId=\"%s\" and DAQsource=\"%s\"",
1239                                 GetCurrentRun(), detector, id, source);
1240                 nFields = 2;
1241
1242         }
1243         else if (system == kDCS)
1244         {
1245                 sqlQueryStart = Form("select filePath,size from %s where", fConfig->GetFXSdbTable(system));
1246                 whereClause = Form("run=%d and detector=\"%s\" and fileId=\"%s\"",
1247                                 GetCurrentRun(), detector, id);
1248                 nFields = 2;
1249                 sourceName="none";
1250         }
1251         else if (system == kHLT)
1252         {
1253                 sqlQueryStart = Form("select filePath,fileSize,fileChecksum from %s where",
1254                                                                                 fConfig->GetFXSdbTable(system));
1255                 whereClause = Form("run=%d and detector=\"%s\" and fileId=\"%s\" and DDLnumbers=\"%s\"",
1256                                 GetCurrentRun(), detector, id, source);
1257                 nFields = 3;
1258         }
1259
1260         TString sqlQuery = Form("%s %s", sqlQueryStart.Data(), whereClause.Data());
1261
1262         AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
1263
1264         // Query execution
1265         TSQLResult* aResult = 0;
1266         aResult = dynamic_cast<TSQLResult*> (fServer[system]->Query(sqlQuery));
1267         if (!aResult) {
1268                 Log(detector, Form("GetFileName - Can't execute SQL query to %s database for: id = %s, source = %s",
1269                                 GetSystemName(system), id, sourceName.Data()));
1270                 return 0;
1271         }
1272
1273         if(aResult->GetRowCount() == 0)
1274         {
1275                 Log(detector,
1276                         Form("GetFileName - No entry in %s FXS db for: id = %s, source = %s",
1277                                 GetSystemName(system), id, sourceName.Data()));
1278                 delete aResult;
1279                 return 0;
1280         }
1281
1282         if (aResult->GetRowCount() > 1) {
1283                 Log(detector,
1284                         Form("GetFileName - More than one entry in %s FXS db for: id = %s, source = %s",
1285                                 GetSystemName(system), id, sourceName.Data()));
1286                 delete aResult;
1287                 return 0;
1288         }
1289
1290         if (aResult->GetFieldCount() != nFields) {
1291                 Log(detector,
1292                         Form("GetFileName - Wrong field count in %s FXS db for: id = %s, source = %s",
1293                                 GetSystemName(system), id, sourceName.Data()));
1294                 delete aResult;
1295                 return 0;
1296         }
1297
1298         TSQLRow* aRow = dynamic_cast<TSQLRow*> (aResult->Next());
1299
1300         if (!aRow){
1301                 Log(detector, Form("GetFileName - Empty set result in %s FXS db from query: id = %s, source = %s",
1302                                 GetSystemName(system), id, sourceName.Data()));
1303                 delete aResult;
1304                 return 0;
1305         }
1306
1307         TString filePath(aRow->GetField(0), aRow->GetFieldLength(0));
1308         TString fileSize(aRow->GetField(1), aRow->GetFieldLength(1));
1309         TString fileMd5Sum;
1310         if(system == kHLT) fileMd5Sum = aRow->GetField(2);
1311
1312         delete aResult;
1313         delete aRow;
1314
1315         AliDebug(2, Form("filePath = %s",filePath.Data()));
1316
1317         // retrieved file is renamed to make it unique
1318         TString localFileName = Form("%s_%s_%d_%s_%s.shuttle",
1319                                         GetSystemName(system), detector, GetCurrentRun(), id, sourceName.Data());
1320
1321
1322         // file retrieval from FXS
1323         UInt_t nRetries = 0;
1324         UInt_t maxRetries = 3;
1325         Bool_t result = kFALSE;
1326
1327         // copy!! if successful TSystem::Exec returns 0
1328         while(nRetries++ < maxRetries) {
1329                 AliDebug(2, Form("Trying to copy file. Retry # %d", nRetries));
1330                 result = RetrieveFile(system, filePath.Data(), localFileName.Data());
1331                 if(!result)
1332                 {
1333                         Log(detector, Form("GetFileName - Copy of file %s from %s FXS failed",
1334                                         filePath.Data(), GetSystemName(system)));
1335                         continue;
1336                 } else {
1337                         AliInfo(Form("File %s copied from %s FXS into %s/%s",
1338                                                 filePath.Data(), GetSystemName(system),
1339                                                 GetShuttleTempDir(), localFileName.Data()));
1340                 }
1341
1342                 if (system == kHLT)
1343                 {
1344                         // compare md5sum of local file with the one stored in the FXS DB
1345                         Int_t md5Comp = gSystem->Exec(Form("md5sum %s/%s |grep %s 2>&1 > /dev/null",
1346                                                 GetShuttleTempDir(), localFileName.Data(), fileMd5Sum.Data()));
1347
1348                         if (md5Comp != 0)
1349                         {
1350                                 Log(detector, Form("GetFileName - md5sum of file %s does not match with local copy!",
1351                                                         filePath.Data()));
1352                                 result = kFALSE;
1353                                 continue;
1354                         }
1355                 }
1356                 if (result) break;
1357         }
1358
1359         if(!result) return 0;
1360
1361         fFXSCalled[system]=kTRUE;
1362         TObjString *fileParams = new TObjString(Form("%s#!?!#%s", id, sourceName.Data()));
1363         fFXSlist[system].Add(fileParams);
1364
1365         static TString fullLocalFileName;
1366         fullLocalFileName = TString::Format("%s/%s", GetShuttleTempDir(), localFileName.Data());
1367
1368         AliInfo(Form("fullLocalFileName = %s", fullLocalFileName.Data()));
1369
1370         return fullLocalFileName.Data();
1371
1372 }
1373
1374 //______________________________________________________________________________________________
1375 Bool_t AliShuttle::RetrieveFile(UInt_t system, const char* fxsFileName, const char* localFileName)
1376 {
1377 // Copies file from FXS to local Shuttle machine
1378
1379         // check temp directory: trying to cd to temp; if it does not exist, create it
1380         AliDebug(2, Form("Copy file %s from %s FXS into %s/%s",
1381                         GetSystemName(system), fxsFileName, GetShuttleTempDir(), localFileName));
1382
1383         void* dir = gSystem->OpenDirectory(GetShuttleTempDir());
1384         if (dir == NULL) {
1385                 if (gSystem->mkdir(GetShuttleTempDir(), kTRUE)) {
1386                         AliError(Form("Can't open directory <%s>", GetShuttleTempDir()));
1387                         return kFALSE;
1388                 }
1389
1390         } else {
1391                 gSystem->FreeDirectory(dir);
1392         }
1393
1394         TString baseFXSFolder;
1395         if (system == kDAQ)
1396         {
1397                 baseFXSFolder = "FES/";
1398         }
1399         else if (system == kDCS)
1400         {
1401                 baseFXSFolder = "";
1402         }
1403         else if (system == kHLT)
1404         {
1405                 baseFXSFolder = "~/";
1406         }
1407
1408
1409         TString command = Form("scp -oPort=%d -2 %s@%s:%s%s %s/%s",
1410                 fConfig->GetFXSPort(system),
1411                 fConfig->GetFXSUser(system),
1412                 fConfig->GetFXSHost(system),
1413                 baseFXSFolder.Data(),
1414                 fxsFileName,
1415                 GetShuttleTempDir(),
1416                 localFileName);
1417
1418         AliDebug(2, Form("%s",command.Data()));
1419
1420         Bool_t result = (gSystem->Exec(command.Data()) == 0);
1421
1422         return result;
1423 }
1424
1425 //______________________________________________________________________________________________
1426 TList* AliShuttle::GetFileSources(Int_t system, const char* detector, const char* id)
1427 {
1428 // Get sources producing the condition file Id from file exchange servers
1429
1430         if (system == kDCS)
1431         {
1432                 AliError("DCS system has only one source of data!");
1433                 return NULL;
1434
1435         }
1436
1437         // check connection, in case connect
1438         if (!Connect(system))
1439         {
1440                 Log(detector, Form("GetFile - Couldn't connect to %s FXS database", GetSystemName(system)));
1441                 return NULL;
1442         }
1443
1444         TString sourceName = 0;
1445         if (system == kDAQ)
1446         {
1447                 sourceName = "DAQsource";
1448         } else if (system == kHLT)
1449         {
1450                 sourceName = "DDLnumbers";
1451         }
1452
1453         TString sqlQueryStart = Form("select %s from %s where", sourceName.Data(), fConfig->GetFXSdbTable(kDAQ));
1454         TString whereClause = Form("run=%d and detector=\"%s\" and fileId=\"%s\"",
1455                                 GetCurrentRun(), detector, id);
1456         TString sqlQuery = Form("%s %s", sqlQueryStart.Data(), whereClause.Data());
1457
1458         AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
1459
1460         // Query execution
1461         TSQLResult* aResult;
1462         aResult = fServer[system]->Query(sqlQuery);
1463         if (!aResult) {
1464                 Log(detector, Form("GetFileSources - Can't execute SQL query to %s database for id: %s",
1465                                 GetSystemName(system), id));
1466                 return 0;
1467         }
1468
1469         if (aResult->GetRowCount() == 0)
1470         {
1471                 Log(detector,
1472                         Form("GetFileSources - No entry in %s FXS table for id: %s", GetSystemName(system), id));
1473                 delete aResult;
1474                 return 0;
1475         }
1476
1477         TSQLRow* aRow;
1478         TList *list = new TList();
1479         list->SetOwner(1);
1480
1481         while ((aRow = aResult->Next()))
1482         {
1483
1484                 TString source(aRow->GetField(0), aRow->GetFieldLength(0));
1485                 AliDebug(2, Form("%s = %s", sourceName.Data(), source.Data()));
1486                 list->Add(new TObjString(source));
1487                 delete aRow;
1488         }
1489
1490         delete aResult;
1491
1492         return list;
1493 }
1494
1495 //______________________________________________________________________________________________
1496 Bool_t AliShuttle::Connect(Int_t system)
1497 {
1498 // Connect to MySQL Server of the system's FXS MySQL databases
1499 // DAQ Logbook, Shuttle Logbook and DAQ FXS db are on the same host
1500
1501         // check connection: if already connected return
1502         if(fServer[system] && fServer[system]->IsConnected()) return kTRUE;
1503
1504         TString dbHost, dbUser, dbPass, dbName;
1505
1506         if (system < 3) // FXS db servers
1507         {
1508                 dbHost = Form("mysql://%s:%d", fConfig->GetFXSdbHost(system), fConfig->GetFXSdbPort(system));
1509                 dbUser = fConfig->GetFXSdbUser(system);
1510                 dbPass = fConfig->GetFXSdbPass(system);
1511                 dbName =   fConfig->GetFXSdbName(system);
1512         } else { // Run & Shuttle logbook servers
1513         // TODO Will the Shuttle logbook server be the same as the Run logbook server ???
1514                 dbHost = Form("mysql://%s:%d", fConfig->GetDAQlbHost(), fConfig->GetDAQlbPort());
1515                 dbUser = fConfig->GetDAQlbUser();
1516                 dbPass = fConfig->GetDAQlbPass();
1517                 dbName =   fConfig->GetDAQlbDB();
1518         }
1519
1520         fServer[system] = TSQLServer::Connect(dbHost.Data(), dbUser.Data(), dbPass.Data());
1521         if (!fServer[system] || !fServer[system]->IsConnected()) {
1522                 if(system < 3)
1523                 {
1524                 AliError(Form("Can't establish connection to FXS database for %s",
1525                                         AliShuttleInterface::GetSystemName(system)));
1526                 } else {
1527                 AliError("Can't establish connection to Run logbook.");
1528                 }
1529                 if(fServer[system]) delete fServer[system];
1530                 return kFALSE;
1531         }
1532
1533         // Get tables
1534         TSQLResult* aResult=0;
1535         switch(system){
1536                 case kDAQ:
1537                         aResult = fServer[kDAQ]->GetTables(dbName.Data());
1538                         break;
1539                 case kDCS:
1540                         aResult = fServer[kDCS]->GetTables(dbName.Data());
1541                         break;
1542                 case kHLT:
1543                         aResult = fServer[kHLT]->GetTables(dbName.Data());
1544                         break;
1545                 default:
1546                         aResult = fServer[3]->GetTables(dbName.Data());
1547                         break;
1548         }
1549
1550         delete aResult;
1551         return kTRUE;
1552 }
1553
1554 //______________________________________________________________________________________________
1555 Bool_t AliShuttle::UpdateTable()
1556 {
1557 // Update FXS table filling time_processed field in all rows corresponding to current run and detector
1558
1559         Bool_t result = kTRUE;
1560
1561         for (UInt_t system=0; system<3; system++)
1562         {
1563                 if(!fFXSCalled[system]) continue;
1564
1565                 // check connection, in case connect
1566                 if (!Connect(system))
1567                 {
1568                         Log(fCurrentDetector, Form("UpdateTable - Couldn't connect to %s FXS database", GetSystemName(system)));
1569                         result = kFALSE;
1570                         continue;
1571                 }
1572
1573                 TTimeStamp now; // now
1574
1575                 // Loop on FXS list entries
1576                 TIter iter(&fFXSlist[system]);
1577                 TObjString *aFXSentry=0;
1578                 while ((aFXSentry = dynamic_cast<TObjString*> (iter.Next())))
1579                 {
1580                         TString aFXSentrystr = aFXSentry->String();
1581                         TObjArray *aFXSarray = aFXSentrystr.Tokenize("#!?!#");
1582                         if (!aFXSarray || aFXSarray->GetEntries() != 2 )
1583                         {
1584                                 Log(fCurrentDetector, Form("UpdateTable - error updating %s FXS entry. Check string: <%s>",
1585                                         GetSystemName(system), aFXSentrystr.Data()));
1586                                 if(aFXSarray) delete aFXSarray;
1587                                 result = kFALSE;
1588                                 continue;
1589                         }
1590                         const char* fileId = ((TObjString*) aFXSarray->At(0))->GetName();
1591                         const char* source = ((TObjString*) aFXSarray->At(1))->GetName();
1592
1593                         TString whereClause;
1594                         if (system == kDAQ)
1595                         {
1596                                 whereClause = Form("where run=%d and detector=\"%s\" and fileId=\"%s\" and DAQsource=\"%s\";",
1597                                                         GetCurrentRun(), fCurrentDetector.Data(), fileId, source);
1598                         }
1599                         else if (system == kDCS)
1600                         {
1601                                 whereClause = Form("where run=%d and detector=\"%s\" and fileId=\"%s\";",
1602                                                         GetCurrentRun(), fCurrentDetector.Data(), fileId);
1603                         }
1604                         else if (system == kHLT)
1605                         {
1606                                 whereClause = Form("where run=%d and detector=\"%s\" and fileId=\"%s\" and DDLnumbers=\"%s\";",
1607                                                         GetCurrentRun(), fCurrentDetector.Data(), fileId, source);
1608                         }
1609
1610                         delete aFXSarray;
1611
1612                         TString sqlQuery = Form("update %s set time_processed=%d %s", fConfig->GetFXSdbTable(system),
1613                                                                 now.GetSec(), whereClause.Data());
1614
1615                         AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
1616
1617                         // Query execution
1618                         TSQLResult* aResult;
1619                         aResult = dynamic_cast<TSQLResult*> (fServer[system]->Query(sqlQuery));
1620                         if (!aResult)
1621                         {
1622                                 Log(fCurrentDetector, Form("UpdateTable - %s db: can't execute SQL query <%s>",
1623                                                                 GetSystemName(system), sqlQuery.Data()));
1624                                 result = kFALSE;
1625                                 continue;
1626                         }
1627                         delete aResult;
1628                 }
1629         }
1630
1631         return result;
1632 }
1633
1634 //______________________________________________________________________________________________
1635 Bool_t AliShuttle::UpdateShuttleLogbook(const char* detector, const char* status)
1636 {
1637         //
1638         // Update Shuttle logbook filling detector or shuttle_done column
1639         // ex. of usage: UpdateShuttleLogbook("PHOS", "DONE") or UpdateShuttleLogbook("shuttle_done")
1640         //
1641
1642         // check connection, in case connect
1643         if(!Connect(3)){
1644                 Log("SHUTTLE", "UpdateShuttleLogbook - Couldn't connect to DAQ Logbook.");
1645                 return kFALSE;
1646         }
1647
1648         TString detName(detector);
1649         TString setClause;
1650         if(detName == "shuttle_done")
1651         {
1652                 setClause = "set shuttle_done=1";
1653
1654                 // Send the information to ML
1655                 TMonaLisaText  mlStatus("SHUTTLE_status", "Done");
1656
1657                 TList mlList;
1658                 mlList.Add(&mlStatus);
1659
1660                 fMonaLisa->SendParameters(&mlList);
1661         } else {
1662                 TString statusStr(status);
1663                 if(statusStr.Contains("done", TString::kIgnoreCase) ||
1664                    statusStr.Contains("failed", TString::kIgnoreCase)){
1665                         setClause = Form("set %s=\"%s\"", detector, status);
1666                 } else {
1667                         Log("SHUTTLE",
1668                                 Form("UpdateShuttleLogbook - Invalid status <%s> for detector %s",
1669                                         status, detector));
1670                         return kFALSE;
1671                 }
1672         }
1673
1674         TString whereClause = Form("where run=%d", GetCurrentRun());
1675
1676         TString sqlQuery = Form("update logbook_shuttle %s %s",
1677                                         setClause.Data(), whereClause.Data());
1678
1679         AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
1680
1681         // Query execution
1682         TSQLResult* aResult;
1683         aResult = dynamic_cast<TSQLResult*> (fServer[3]->Query(sqlQuery));
1684         if (!aResult) {
1685                 Log("SHUTTLE", Form("UpdateShuttleLogbook - Can't execute query <%s>", sqlQuery.Data()));
1686                 return kFALSE;
1687         }
1688         delete aResult;
1689
1690         return kTRUE;
1691 }
1692
1693 //______________________________________________________________________________________________
1694 Int_t AliShuttle::GetCurrentRun() const
1695 {
1696 // Get current run from logbook entry
1697
1698         return fLogbookEntry ? fLogbookEntry->GetRun() : -1;
1699 }
1700
1701 //______________________________________________________________________________________________
1702 UInt_t AliShuttle::GetCurrentStartTime() const
1703 {
1704 // get current start time
1705
1706         return fLogbookEntry ? fLogbookEntry->GetStartTime() : 0;
1707 }
1708
1709 //______________________________________________________________________________________________
1710 UInt_t AliShuttle::GetCurrentEndTime() const
1711 {
1712 // get current end time from logbook entry
1713
1714         return fLogbookEntry ? fLogbookEntry->GetEndTime() : 0;
1715 }
1716
1717 //______________________________________________________________________________________________
1718 void AliShuttle::Log(const char* detector, const char* message)
1719 {
1720 // Fill log string with a message
1721
1722         void* dir = gSystem->OpenDirectory(GetShuttleLogDir());
1723         if (dir == NULL) {
1724                 if (gSystem->mkdir(GetShuttleLogDir(), kTRUE)) {
1725                         AliError(Form("Can't open directory <%s>", GetShuttleLogDir()));
1726                         return;
1727                 }
1728
1729         } else {
1730                 gSystem->FreeDirectory(dir);
1731         }
1732
1733         TString toLog = Form("%s (%d): %s - ", TTimeStamp(time(0)).AsString("s"), getpid(), detector);
1734         if (GetCurrentRun() >= 0) 
1735                 toLog += Form("run %d - ", GetCurrentRun());
1736         toLog += Form("%s", message);
1737
1738         AliInfo(toLog.Data());
1739
1740         TString fileName;
1741         if (GetCurrentRun() >= 0) 
1742                 fileName.Form("%s/%s_%d.log", GetShuttleLogDir(), detector, GetCurrentRun());
1743         else
1744                 fileName.Form("%s/%s.log", GetShuttleLogDir(), detector);
1745         
1746         gSystem->ExpandPathName(fileName);
1747
1748         ofstream logFile;
1749         logFile.open(fileName, ofstream::out | ofstream::app);
1750
1751         if (!logFile.is_open()) {
1752                 AliError(Form("Could not open file %s", fileName.Data()));
1753                 return;
1754         }
1755
1756         logFile << toLog.Data() << "\n";
1757
1758         logFile.close();
1759 }
1760
1761 //______________________________________________________________________________________________
1762 Bool_t AliShuttle::Collect(Int_t run)
1763 {
1764 //
1765 // Collects conditions data for all UNPROCESSED run written to DAQ LogBook in case of run = -1 (default)
1766 // If a dedicated run is given this run is processed
1767 //
1768 // In operational mode, this is the Shuttle function triggered by the EOR signal.
1769 //
1770
1771         if (run == -1)
1772                 Log("SHUTTLE","Collect - Shuttle called. Collecting conditions data for unprocessed runs");
1773         else
1774                 Log("SHUTTLE", Form("Collect - Shuttle called. Collecting conditions data for run %d", run));
1775
1776         SetLastAction("Starting");
1777
1778         TString whereClause("where shuttle_done=0");
1779         if (run != -1)
1780                 whereClause += Form(" and run=%d", run);
1781
1782         TObjArray shuttleLogbookEntries;
1783         if (!QueryShuttleLogbook(whereClause, shuttleLogbookEntries))
1784         {
1785                 Log("SHUTTLE", "Collect - Can't retrieve entries from Shuttle logbook");
1786                 return kFALSE;
1787         }
1788
1789         if (shuttleLogbookEntries.GetEntries() == 0)
1790         {
1791                 if (run == -1)
1792                         Log("SHUTTLE","Collect - Found no UNPROCESSED runs in Shuttle logbook");
1793                 else
1794                         Log("SHUTTLE", Form("Collect - Run %d is already DONE "
1795                                                 "or it does not exist in Shuttle logbook", run));
1796                 return kTRUE;
1797         }
1798
1799         for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
1800                 fFirstUnprocessed[iDet] = kTRUE;
1801
1802         if (run != -1)
1803         {
1804                 // query Shuttle logbook for earlier runs, check if some detectors are unprocessed,
1805                 // flag them into fFirstUnprocessed array
1806                 TString whereClause(Form("where shuttle_done=0 and run < %d", run));
1807                 TObjArray tmpLogbookEntries;
1808                 if (!QueryShuttleLogbook(whereClause, tmpLogbookEntries))
1809                 {
1810                         Log("SHUTTLE", "Collect - Can't retrieve entries from Shuttle logbook");
1811                         return kFALSE;
1812                 }
1813
1814                 TIter iter(&tmpLogbookEntries);
1815                 AliShuttleLogbookEntry* anEntry = 0;
1816                 while ((anEntry = dynamic_cast<AliShuttleLogbookEntry*> (iter.Next())))
1817                 {
1818                         for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
1819                         {
1820                                 if (anEntry->GetDetectorStatus(iDet) == AliShuttleLogbookEntry::kUnprocessed)
1821                                 {
1822                                         AliDebug(2, Form("Run %d: setting %s as \"not first time unprocessed\"",
1823                                                         anEntry->GetRun(), GetDetName(iDet)));
1824                                         fFirstUnprocessed[iDet] = kFALSE;
1825                                 }
1826                         }
1827
1828                 }
1829
1830         }
1831
1832         if (!RetrieveConditionsData(shuttleLogbookEntries))
1833         {
1834                 Log("SHUTTLE", "Collect - Process of at least one run failed");
1835                 return kFALSE;
1836         }
1837
1838         Log("SHUTTLE", "Collect - Requested run(s) successfully processed");
1839         return kTRUE;
1840 }
1841
1842 //______________________________________________________________________________________________
1843 Bool_t AliShuttle::RetrieveConditionsData(const TObjArray& dateEntries)
1844 {
1845 // Retrieve conditions data for all runs that aren't processed yet
1846
1847         Bool_t hasError = kFALSE;
1848
1849         TIter iter(&dateEntries);
1850         AliShuttleLogbookEntry* anEntry;
1851
1852         while ((anEntry = (AliShuttleLogbookEntry*) iter.Next())){
1853                 if (!Process(anEntry)){
1854                         hasError = kTRUE;
1855                 }
1856
1857                 // clean SHUTTLE temp directory
1858                 TString command = Form("rm -f %s/*.shuttle", GetShuttleTempDir());
1859                 gSystem->Exec(command.Data());
1860         }
1861
1862         return hasError == kFALSE;
1863 }
1864
1865 //______________________________________________________________________________________________
1866 ULong_t AliShuttle::GetTimeOfLastAction() const
1867 {
1868         ULong_t tmp;
1869
1870         fMonitoringMutex->Lock();
1871
1872         tmp = fLastActionTime;
1873
1874         fMonitoringMutex->UnLock();
1875
1876         return tmp;
1877 }
1878
1879 //______________________________________________________________________________________________
1880 const TString AliShuttle::GetLastAction() const
1881 {
1882         // returns a string description of the last action
1883
1884         TString tmp;
1885
1886         fMonitoringMutex->Lock();
1887         
1888         tmp = fLastAction;
1889         
1890         fMonitoringMutex->UnLock();
1891
1892         return tmp;
1893 }
1894
1895 //______________________________________________________________________________________________
1896 void AliShuttle::SetLastAction(const char* action)
1897 {
1898         // updates the monitoring variables
1899
1900         fMonitoringMutex->Lock();
1901
1902         fLastAction = action;
1903         fLastActionTime = time(0);
1904         
1905         fMonitoringMutex->UnLock();
1906 }
1907
1908 //______________________________________________________________________________________________
1909 const char* AliShuttle::GetRunParameter(const char* param)
1910 {
1911 // returns run parameter read from DAQ logbook
1912
1913         if(!fLogbookEntry) {
1914                 AliError("No logbook entry!");
1915                 return 0;
1916         }
1917
1918         return fLogbookEntry->GetRunParameter(param);
1919 }
1920
1921 //______________________________________________________________________________________________
1922 Bool_t AliShuttle::SendMail()
1923 {
1924 // sends a mail to the subdetector expert in case of preprocessor error
1925
1926         void* dir = gSystem->OpenDirectory(GetShuttleLogDir());
1927         if (dir == NULL)
1928         {
1929                 if (gSystem->mkdir(GetShuttleLogDir(), kTRUE))
1930                 {
1931                         AliError(Form("Can't open directory <%s>", GetShuttleLogDir()));
1932                         return kFALSE;
1933                 }
1934
1935         } else {
1936                 gSystem->FreeDirectory(dir);
1937         }
1938
1939         TString bodyFileName;
1940         bodyFileName.Form("%s/mail.body", GetShuttleLogDir());
1941         gSystem->ExpandPathName(bodyFileName);
1942
1943         ofstream mailBody;
1944         mailBody.open(bodyFileName, ofstream::out);
1945
1946         if (!mailBody.is_open())
1947         {
1948                 AliError(Form("Could not open mail body file %s", bodyFileName.Data()));
1949                 return kFALSE;
1950         }
1951
1952         TString to="";
1953         TIter iterExperts(fConfig->GetResponsibles(fCurrentDetector));
1954         TObjString *anExpert=0;
1955         while ((anExpert = (TObjString*) iterExperts.Next()))
1956         {
1957                 to += Form("%s,", anExpert->GetName());
1958         }
1959         to.Remove(to.Length()-1);
1960         AliDebug(2, Form("to: %s",to.Data()));
1961
1962         // TODO this will be removed...
1963         if (to.Contains("not_yet_set")) {
1964                 AliInfo("List of detector responsibles not yet set!");
1965                 return kFALSE;
1966         }
1967
1968         TString cc="alberto.colla@cern.ch";
1969
1970         TString subject = Form("%s Shuttle preprocessor error in run %d !",
1971                                 fCurrentDetector.Data(), GetCurrentRun());
1972         AliDebug(2, Form("subject: %s", subject.Data()));
1973
1974         TString body = Form("Dear %s expert(s), \n\n", fCurrentDetector.Data());
1975         body += Form("SHUTTLE just detected that your preprocessor "
1976                         "exited with ERROR state in run %d!!\n\n", GetCurrentRun());
1977         body += Form("Please check %s status on the web page asap!\n\n", fCurrentDetector.Data());
1978         body += Form("The last 10 lines of %s log file are following:\n\n");
1979
1980         AliDebug(2, Form("Body begin: %s", body.Data()));
1981
1982         mailBody << body.Data();
1983         mailBody.close();
1984         mailBody.open(bodyFileName, ofstream::out | ofstream::app);
1985
1986         TString logFileName = Form("%s/%s_%d.log", GetShuttleLogDir(), fCurrentDetector.Data(), GetCurrentRun());
1987         TString tailCommand = Form("tail -n 10 %s >> %s", logFileName.Data(), bodyFileName.Data());
1988         if (gSystem->Exec(tailCommand.Data()))
1989         {
1990                 mailBody << Form("%s log file not found ...\n\n", fCurrentDetector.Data());
1991         }
1992
1993         TString endBody = Form("------------------------------------------------------\n\n");
1994         endBody += Form("In case of problems please contact the SHUTTLE core team.\n\n");
1995         endBody += "Please do not answer this message directly, it is automatically generated.\n\n";
1996         endBody += "Sincerely yours,\n\n \t\t\tthe SHUTTLE\n";
1997
1998         AliDebug(2, Form("Body end: %s", endBody.Data()));
1999
2000         mailBody << endBody.Data();
2001
2002         mailBody.close();
2003
2004         // send mail!
2005         TString mailCommand = Form("mail -s \"%s\" -c %s %s < %s",
2006                                                 subject.Data(),
2007                                                 cc.Data(),
2008                                                 to.Data(),
2009                                                 bodyFileName.Data());
2010         AliDebug(2, Form("mail command: %s", mailCommand.Data()));
2011
2012         Bool_t result = gSystem->Exec(mailCommand.Data());
2013
2014         return result == 0;
2015 }