]> git.uio.no Git - u/mrichter/AliRoot.git/blob - SHUTTLE/AliShuttle.cxx
adding monalisa monitoring
[u/mrichter/AliRoot.git] / SHUTTLE / AliShuttle.cxx
1 /**************************************************************************
2  * Copyright(c) 1998-1999, ALICE Experiment at CERN, All rights reserved. *
3  *                                                                        *
4  * Author: The ALICE Off-line Project.                                    *
5  * Contributors are mentioned in the code where appropriate.              *
6  *                                                                        *
7  * Permission to use, copy, modify and distribute this software and its   *
8  * documentation strictly for non-commercial purposes is hereby granted   *
9  * without fee, provided that the above copyright notice appears in all   *
10  * copies and that both the copyright notice and this permission notice   *
11  * appear in the supporting documentation. The authors make no claims     *
12  * about the suitability of this software for any purpose. It is          *
13  * provided "as is" without express or implied warranty.                  *
14  **************************************************************************/
15
16 /*
17 $Log$
18 Revision 1.26  2007/01/23 19:20:03  acolla
19 Removed old ldif files, added TOF, MCH ldif files. Added some options in
20 AliShuttleConfig::Print. Added in Ali Shuttle: SetShuttleTempDir and
21 SetShuttleLogDir
22
23 Revision 1.25  2007/01/15 19:13:52  acolla
24 Moved some AliInfo to AliDebug in SendMail function
25
26 Revision 1.21  2006/12/07 08:51:26  jgrosseo
27 update (alberto):
28 table, db names in ldap configuration
29 added GRP preprocessor
30 DCS data can also be retrieved by data point
31
32 Revision 1.20  2006/11/16 16:16:48  jgrosseo
33 introducing strict run ordering flag
34 removed giving preprocessor name to preprocessor, they have to know their name themselves ;-)
35
36 Revision 1.19  2006/11/06 14:23:04  jgrosseo
37 major update (Alberto)
38 o) reading of run parameters from the logbook
39 o) online offline naming conversion
40 o) standalone DCSclient package
41
42 Revision 1.18  2006/10/20 15:22:59  jgrosseo
43 o) Adding time out to the execution of the preprocessors: The Shuttle forks and the parent process monitors the child
44 o) Merging Collect, CollectAll, CollectNew function
45 o) Removing implementation of empty copy constructors (declaration still there!)
46
47 Revision 1.17  2006/10/05 16:20:55  jgrosseo
48 adapting to new CDB classes
49
50 Revision 1.16  2006/10/05 15:46:26  jgrosseo
51 applying to the new interface
52
53 Revision 1.15  2006/10/02 16:38:39  jgrosseo
54 update (alberto):
55 fixed memory leaks
56 storing of objects that failed to be stored to the grid before
57 interfacing of shuttle status table in daq system
58
59 Revision 1.14  2006/08/29 09:16:05  jgrosseo
60 small update
61
62 Revision 1.13  2006/08/15 10:50:00  jgrosseo
63 effc++ corrections (alberto)
64
65 Revision 1.12  2006/08/08 14:19:29  jgrosseo
66 Update to shuttle classes (Alberto)
67
68 - Possibility to set the full object's path in the Preprocessor's and
69 Shuttle's  Store functions
70 - Possibility to extend the object's run validity in the same classes
71 ("startValidity" and "validityInfinite" parameters)
72 - Implementation of the StoreReferenceData function to store reference
73 data in a dedicated CDB storage.
74
75 Revision 1.11  2006/07/21 07:37:20  jgrosseo
76 last run is stored after each run
77
78 Revision 1.10  2006/07/20 09:54:40  jgrosseo
79 introducing status management: The processing per subdetector is divided into several steps,
80 after each step the status is stored on disk. If the system crashes in any of the steps the Shuttle
81 can keep track of the number of failures and skips further processing after a certain threshold is
82 exceeded. These thresholds can be configured in LDAP.
83
84 Revision 1.9  2006/07/19 10:09:55  jgrosseo
85 new configuration, accesst to DAQ FES (Alberto)
86
87 Revision 1.8  2006/07/11 12:44:36  jgrosseo
88 adding parameters for extended validity range of data produced by preprocessor
89
90 Revision 1.7  2006/07/10 14:37:09  jgrosseo
91 small fix + todo comment
92
93 Revision 1.6  2006/07/10 13:01:41  jgrosseo
94 enhanced storing of last sucessfully processed run (alberto)
95
96 Revision 1.5  2006/07/04 14:59:57  jgrosseo
97 revision of AliDCSValue: Removed wrapper classes, reduced storage size per value by factor 2
98
99 Revision 1.4  2006/06/12 09:11:16  jgrosseo
100 coding conventions (Alberto)
101
102 Revision 1.3  2006/06/06 14:26:40  jgrosseo
103 o) removed files that were moved to STEER
104 o) shuttle updated to follow the new interface (Alberto)
105
106 Revision 1.2  2006/03/07 07:52:34  hristov
107 New version (B.Yordanov)
108
109 Revision 1.6  2005/11/19 17:19:14  byordano
110 RetrieveDATEEntries and RetrieveConditionsData added
111
112 Revision 1.5  2005/11/19 11:09:27  byordano
113 AliShuttle declaration added
114
115 Revision 1.4  2005/11/17 17:47:34  byordano
116 TList changed to TObjArray
117
118 Revision 1.3  2005/11/17 14:43:23  byordano
119 import to local CVS
120
121 Revision 1.1.1.1  2005/10/28 07:33:58  hristov
122 Initial import as subdirectory in AliRoot
123
124 Revision 1.2  2005/09/13 08:41:15  byordano
125 default startTime endTime added
126
127 Revision 1.4  2005/08/30 09:13:02  byordano
128 some docs added
129
130 Revision 1.3  2005/08/29 21:15:47  byordano
131 some docs added
132
133 */
134
135 //
136 // This class is the main manager for AliShuttle. 
137 // It organizes the data retrieval from DCS and call the 
138 // interface methods of AliPreprocessor.
139 // For every detector in AliShuttleConfgi (see AliShuttleConfig),
140 // data for its set of aliases is retrieved. If there is registered
141 // AliPreprocessor for this detector then it will be used
142 // accroding to the schema (see AliPreprocessor).
143 // If there isn't registered AliPreprocessor than the retrieved
144 // data is stored automatically to the undelying AliCDBStorage.
145 // For detSpec is used the alias name.
146 //
147
148 #include "AliShuttle.h"
149
150 #include "AliCDBManager.h"
151 #include "AliCDBStorage.h"
152 #include "AliCDBId.h"
153 #include "AliCDBRunRange.h"
154 #include "AliCDBPath.h"
155 #include "AliCDBEntry.h"
156 #include "AliShuttleConfig.h"
157 #include "DCSClient/AliDCSClient.h"
158 #include "AliLog.h"
159 #include "AliPreprocessor.h"
160 #include "AliShuttleStatus.h"
161 #include "AliShuttleLogbookEntry.h"
162
163 #include <TSystem.h>
164 #include <TObject.h>
165 #include <TString.h>
166 #include <TTimeStamp.h>
167 #include <TObjString.h>
168 #include <TSQLServer.h>
169 #include <TSQLResult.h>
170 #include <TSQLRow.h>
171 #include <TMutex.h>
172
173 #include <TMonaLisaWriter.h>
174
175 #include <fstream>
176
177 #include <sys/types.h>
178 #include <sys/wait.h>
179
180 ClassImp(AliShuttle)
181
182 TString AliShuttle::fgkMainCDB("alien://folder=ShuttleCDB");
183 TString AliShuttle::fgkLocalCDB("local://LocalShuttleCDB");
184 TString AliShuttle::fgkMainRefStorage("alien://folder=ShuttleReference");
185 TString AliShuttle::fgkLocalRefStorage("local://LocalReferenceStorage");
186
187 Bool_t AliShuttle::fgkProcessDCS(kTRUE); 
188
189 TString AliShuttle::fgkShuttleTempDir = gSystem->ExpandPathName("$ALICE_ROOT/SHUTTLE/temp");
190 TString AliShuttle::fgkShuttleLogDir = gSystem->ExpandPathName("$ALICE_ROOT/SHUTTLE/log");
191
192 //______________________________________________________________________________________________
193 AliShuttle::AliShuttle(const AliShuttleConfig* config,
194                 UInt_t timeout, Int_t retries):
195 fConfig(config),
196 fTimeout(timeout), fRetries(retries),
197 fPreprocessorMap(),
198 fLogbookEntry(0),
199 fCurrentDetector(),
200 fStatusEntry(0),
201 fGridError(kFALSE),
202 fMonitoringMutex(0),
203 fLastActionTime(0),
204 fLastAction(),
205 fMonaLisa(0)
206 {
207         //
208         // config: AliShuttleConfig used
209         // timeout: timeout used for AliDCSClient connection
210         // retries: the number of retries in case of connection error.
211         //
212
213         if (!fConfig->IsValid()) AliFatal("********** !!!!! Invalid configuration !!!!! **********");
214         for(int iSys=0;iSys<4;iSys++) {
215                 fServer[iSys]=0;
216                 if (iSys < 3)
217                         fFXSlist[iSys].SetOwner(kTRUE);
218         }
219         fPreprocessorMap.SetOwner(kTRUE);
220
221         for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
222                 fFirstUnprocessed[iDet] = kFALSE;
223
224         fMonitoringMutex = new TMutex();
225 }
226
227 //______________________________________________________________________________________________
228 AliShuttle::~AliShuttle()
229 {
230 // destructor
231
232         fPreprocessorMap.DeleteAll();
233         for(int iSys=0;iSys<4;iSys++)
234                 if(fServer[iSys]) {
235                         fServer[iSys]->Close();
236                         delete fServer[iSys];
237                         fServer[iSys] = 0;
238                 }
239
240         if (fStatusEntry){
241                 delete fStatusEntry;
242                 fStatusEntry = 0;
243         }
244         
245         if (fMonitoringMutex) 
246         {
247                 delete fMonitoringMutex;
248                 fMonitoringMutex = 0;
249         }
250 }
251
252 //______________________________________________________________________________________________
253 void AliShuttle::RegisterPreprocessor(AliPreprocessor* preprocessor)
254 {
255         //
256         // Registers new AliPreprocessor.
257         // It uses GetName() for indentificator of the pre processor.
258         // The pre processor is registered it there isn't any other
259         // with the same identificator (GetName()).
260         //
261
262         const char* detName = preprocessor->GetName();
263         if(GetDetPos(detName) < 0)
264                 AliFatal(Form("********** !!!!! Invalid detector name: %s !!!!! **********", detName));
265
266         if (fPreprocessorMap.GetValue(detName)) {
267                 AliWarning(Form("AliPreprocessor %s is already registered!", detName));
268                 return;
269         }
270
271         fPreprocessorMap.Add(new TObjString(detName), preprocessor);
272 }
273 //______________________________________________________________________________________________
274 UInt_t AliShuttle::Store(const AliCDBPath& path, TObject* object,
275                 AliCDBMetaData* metaData, Int_t validityStart, Bool_t validityInfinite)
276 {
277   // Stores a CDB object in the storage for offline reconstruction. Objects that are not needed for
278   // offline reconstruction, but should be stored anyway (e.g. for debugging) should NOT be stored
279   // using this function. Use StoreReferenceData instead!
280   // It calls WriteToCDB function which perform actual storage
281
282         return WriteToCDB(fgkMainCDB, fgkLocalCDB, path, object,
283                                 metaData, validityStart, validityInfinite);
284
285 }
286
287 //______________________________________________________________________________________________
288 UInt_t AliShuttle::StoreReferenceData(const AliCDBPath& path, TObject* object, AliCDBMetaData* metaData)
289 {
290   // Stores a CDB object in the storage for reference data. This objects will not be available during
291   // offline reconstrunction. Use this function for reference data only!
292   // It calls WriteToCDB function which perform actual storage
293
294         return WriteToCDB(fgkMainRefStorage, fgkLocalRefStorage, path, object, metaData);
295
296 }
297
298 //______________________________________________________________________________________________
299 UInt_t AliShuttle::WriteToCDB(const char* mainUri, const char* localUri,
300                         const AliCDBPath& path, TObject* object, AliCDBMetaData* metaData,
301                         Int_t validityStart, Bool_t validityInfinite)
302 {
303   // write object into the CDB. Parameters are passed by Store and StoreReferenceData functions.
304   // The parameters are:
305   //   1) Uri of the main storage (Grid)
306   //   2) Uri of the backup storage (Local)
307   //   3) the object's path.
308   //   4) the object to be stored
309   //   5) the metaData to be associated with the object
310   //   6) the validity start run number w.r.t. the current run,
311   //      if the data is valid only for this run leave the default 0
312   //   7) specifies if the calibration data is valid for infinity (this means until updated),
313   //      typical for calibration runs, the default is kFALSE
314   //
315   // returns 0 if fail
316   //         1 if stored in main (Grid) storage
317   //         2 if stored in backup (Local) storage
318
319         const char* cdbType = (mainUri == fgkMainCDB) ? "CDB" : "Reference";
320
321         Int_t firstRun = GetCurrentRun() - validityStart;
322         if(firstRun < 0) {
323                 AliError("First valid run happens to be less than 0! Setting it to 0.");
324                 firstRun=0;
325         }
326
327         Int_t lastRun = -1;
328         if(validityInfinite) {
329                 lastRun = AliCDBRunRange::Infinity();
330         } else {
331                 lastRun = GetCurrentRun();
332         }
333
334         AliCDBId id(path, firstRun, lastRun, -1, -1);
335
336         if(! dynamic_cast<TObjString*> (metaData->GetProperty("RunUsed(TObjString)"))){
337                 TObjString runUsed = Form("%d", GetCurrentRun());
338                 metaData->SetProperty("RunUsed(TObjString)", runUsed.Clone());
339         }
340
341         UInt_t result = 0;
342
343         if (!(AliCDBManager::Instance()->GetStorage(mainUri))) {
344                 AliError(Form("WriteToCDB - Cannot activate main %s storage", cdbType));
345         } else {
346                 result = (UInt_t) AliCDBManager::Instance()->GetStorage(mainUri)
347                                         ->Put(object, id, metaData);
348         }
349
350         if(!result) {
351
352                 Log(fCurrentDetector,
353                         Form("WriteToCDB - Problem with main %s storage. Putting <%s> into backup storage",
354                                 cdbType, path.GetPath().Data()));
355
356                 // Set Grid version to current run number, to ease retrieval later
357                 id.SetVersion(GetCurrentRun());
358
359                 result = AliCDBManager::Instance()->GetStorage(localUri)
360                                         ->Put(object, id, metaData);
361
362                 if(result) {
363                         result = 2;
364                         fGridError = kTRUE;
365                 }else{
366                         Log(fCurrentDetector, "WriteToCDB - Can't store data!");
367                 }
368         }
369
370         return result;
371
372 }
373
374 //______________________________________________________________________________________________
375 AliShuttleStatus* AliShuttle::ReadShuttleStatus()
376 {
377 // Reads the AliShuttleStatus from the CDB
378
379         if (fStatusEntry){
380                 delete fStatusEntry;
381                 fStatusEntry = 0;
382         }
383
384         fStatusEntry = AliCDBManager::Instance()->GetStorage(AliShuttle::GetLocalCDB())
385                 ->Get(Form("/SHUTTLE/STATUS/%s", fCurrentDetector.Data()), GetCurrentRun());
386
387         if (!fStatusEntry) return 0;
388         fStatusEntry->SetOwner(1);
389
390         AliShuttleStatus* status = dynamic_cast<AliShuttleStatus*> (fStatusEntry->GetObject());
391         if (!status) {
392                 AliError("Invalid object stored to CDB!");
393                 return 0;
394         }
395
396         return status;
397 }
398
399 //______________________________________________________________________________________________
400 Bool_t AliShuttle::WriteShuttleStatus(AliShuttleStatus* status)
401 {
402 // writes the status for one subdetector
403
404         if (fStatusEntry){
405                 delete fStatusEntry;
406                 fStatusEntry = 0;
407         }
408
409         Int_t run = GetCurrentRun();
410
411         AliCDBId id(AliCDBPath("SHUTTLE", "STATUS", fCurrentDetector), run, run);
412
413         fStatusEntry = new AliCDBEntry(status, id, new AliCDBMetaData);
414         fStatusEntry->SetOwner(1);
415
416         UInt_t result = AliCDBManager::Instance()->GetStorage(fgkLocalCDB)->Put(fStatusEntry);
417
418         if (!result) {
419                 AliError(Form("WriteShuttleStatus for %s, run %d failed", fCurrentDetector.Data(), run));
420                 return kFALSE;
421         }
422         
423         SendMLInfo();
424
425         return kTRUE;
426 }
427
428 //______________________________________________________________________________________________
429 void AliShuttle::UpdateShuttleStatus(AliShuttleStatus::Status newStatus, Bool_t increaseCount)
430 {
431   // changes the AliShuttleStatus for the given detector and run to the given status
432
433         if (!fStatusEntry){
434                 AliError("UNEXPECTED: fStatusEntry empty");
435                 return;
436         }
437
438         AliShuttleStatus* status = dynamic_cast<AliShuttleStatus*> (fStatusEntry->GetObject());
439
440         if (!status){
441                 AliError("UNEXPECTED: status could not be read from current CDB entry");
442                 return;
443         }
444
445         TString actionStr = Form("UpdateShuttleStatus - %s: Changing state from %s to %s",
446                                 fCurrentDetector.Data(),
447                                 status->GetStatusName(),
448                                 status->GetStatusName(newStatus));
449         Log("SHUTTLE", actionStr);
450         SetLastAction(actionStr);
451
452         status->SetStatus(newStatus);
453         if (increaseCount) status->IncreaseCount();
454
455         AliCDBManager::Instance()->GetStorage(fgkLocalCDB)->Put(fStatusEntry);
456
457         SendMLInfo();
458 }
459
460 //______________________________________________________________________________________________
461 void AliShuttle::SendMLInfo()
462 {
463         //
464         // sends ML information about the current status of the current detector being processed
465         //
466         
467         AliShuttleStatus* status = dynamic_cast<AliShuttleStatus*> (fStatusEntry->GetObject());
468         
469         if (!status){
470                 AliError("UNEXPECTED: status could not be read from current CDB entry");
471                 return;
472         }
473         
474         TMonaLisaText  mlStatus(Form("%s_status", fCurrentDetector.Data()), status->GetStatusName());
475         TMonaLisaValue mlRetryCount(Form("%s_count", fCurrentDetector.Data()), status->GetCount());
476
477         TList mlList;
478         mlList.Add(&mlStatus);
479         mlList.Add(&mlRetryCount);
480
481         fMonaLisa->SendParameters(&mlList);
482 }
483
484 //______________________________________________________________________________________________
485 Bool_t AliShuttle::ContinueProcessing()
486 {
487 // this function reads the AliShuttleStatus information from CDB and
488 // checks if the processing should be continued
489 // if yes it returns kTRUE and updates the AliShuttleStatus with nextStatus
490
491         if (!fConfig->HostProcessDetector(fCurrentDetector)) return kFALSE;
492
493         AliPreprocessor* aPreprocessor =
494                 dynamic_cast<AliPreprocessor*> (fPreprocessorMap.GetValue(fCurrentDetector));
495         if (!aPreprocessor)
496         {
497                 AliInfo(Form("%s: no preprocessor registered", fCurrentDetector.Data()));
498                 return kFALSE;
499         }
500
501         AliShuttleLogbookEntry::Status entryStatus =
502                 fLogbookEntry->GetDetectorStatus(fCurrentDetector);
503
504         if(entryStatus != AliShuttleLogbookEntry::kUnprocessed) {
505                 AliInfo(Form("ContinueProcessing - %s is %s",
506                                 fCurrentDetector.Data(),
507                                 fLogbookEntry->GetDetectorStatusName(entryStatus)));
508                 return kFALSE;
509         }
510
511         // if we get here, according to Shuttle logbook subdetector is in UNPROCESSED state
512
513         // check if current run is first unprocessed run for current detector
514         if (fConfig->StrictRunOrder(fCurrentDetector) &&
515                 !fFirstUnprocessed[GetDetPos(fCurrentDetector)])
516         {
517                 Log("SHUTTLE", Form("ContinueProcessing - %s requires strict run ordering but this is not the first unprocessed run!"));
518                 return kFALSE;
519         }
520
521         AliShuttleStatus* status = ReadShuttleStatus();
522         if (!status) {
523                 // first time
524                 Log("SHUTTLE", Form("ContinueProcessing - %s: Processing first time",
525                                 fCurrentDetector.Data()));
526                 status = new AliShuttleStatus(AliShuttleStatus::kStarted);
527                 return WriteShuttleStatus(status);
528         }
529
530         // The following two cases shouldn't happen if Shuttle Logbook was correctly updated.
531         // If it happens it may mean Logbook updating failed... let's do it now!
532         if (status->GetStatus() == AliShuttleStatus::kDone ||
533             status->GetStatus() == AliShuttleStatus::kFailed){
534                 Log("SHUTTLE", Form("ContinueProcessing - %s is already %s. Updating Shuttle Logbook",
535                                         fCurrentDetector.Data(),
536                                         status->GetStatusName(status->GetStatus())));
537                 UpdateShuttleLogbook(fCurrentDetector.Data(),
538                                         status->GetStatusName(status->GetStatus()));
539                 return kFALSE;
540         }
541
542         if (status->GetStatus() == AliShuttleStatus::kStoreFailed) {
543                 Log("SHUTTLE",
544                         Form("ContinueProcessing - %s: Grid storage of one or more objects failed. Trying again now",
545                                 fCurrentDetector.Data()));
546                 if(TryToStoreAgain()){
547                         Log(fCurrentDetector.Data(), "ContinueProcessing - All objects successfully stored into OCDB");
548                         UpdateShuttleStatus(AliShuttleStatus::kDone);
549                         UpdateShuttleLogbook(fCurrentDetector.Data(), "DONE");
550                 } else {
551                         Log("SHUTTLE",
552                                 Form("ContinueProcessing - %s: Grid storage failed again",
553                                         fCurrentDetector.Data()));
554                         // trigger ML information manually because we do not had a status change
555                         SendMLInfo();
556                 }
557                 return kFALSE;
558         }
559
560         // if we get here, there is a restart
561         Bool_t cont = kFALSE;
562
563         // abort conditions
564         if (status->GetCount() >= fConfig->GetMaxRetries()) {
565                 Log("SHUTTLE", Form("ContinueProcessing - %s failed %d times in status %s - "
566                                 "Updating Shuttle Logbook", fCurrentDetector.Data(),
567                                 status->GetCount(), status->GetStatusName()));
568                 UpdateShuttleLogbook(fCurrentDetector.Data(), "FAILED");
569                 UpdateShuttleStatus(AliShuttleStatus::kFailed);
570         } else {
571                 Log("SHUTTLE", Form("ContinueProcessing - %s: restarting. "
572                                 "Aborted before with %s. Retry number %d.", fCurrentDetector.Data(),
573                                 status->GetStatusName(), status->GetCount()));
574                 UpdateShuttleStatus(AliShuttleStatus::kStarted, kTRUE);
575                 cont = kTRUE;
576         }
577
578         // Send mail to detector expert!
579         AliInfo(Form("Sending mail to %s expert...", fCurrentDetector.Data()));
580         if (!SendMail())
581                 Log("SHUTTLE", Form("ContinueProcessing - Could not send mail to %s expert",
582                                 fCurrentDetector.Data()));
583
584         return cont;
585 }
586
587 //______________________________________________________________________________________________
588 Bool_t AliShuttle::Process(AliShuttleLogbookEntry* entry)
589 {
590         //
591         // Makes data retrieval for all detectors in the configuration.
592         // entry: Shuttle logbook entry, contains run paramenters and status of detectors
593         // (Unprocessed, Inactive, Failed or Done).
594         // Returns kFALSE in case of error occured and kTRUE otherwise
595         //
596
597         if(!entry) return kFALSE;
598
599         fLogbookEntry = entry;
600
601         if (fLogbookEntry->IsDone())
602         {
603                 Log("SHUTTLE","Process - Shuttle is already DONE. Updating logbook");
604                 UpdateShuttleLogbook("shuttle_done");
605                 fLogbookEntry = 0;
606                 return kTRUE;
607         }
608
609         // create ML instance that monitors this run
610         fMonaLisa = new TMonaLisaWriter(Form("%d", GetCurrentRun()), "SHUTTLE", "aliendb1.cern.ch");
611         // disable monitoring of other parameters that come e.g. from TFile
612         gMonitoringWriter = 0;
613
614         AliInfo(Form("\n\n \t\t\t^*^*^*^*^*^*^*^*^*^*^*^* run %d: START ^*^*^*^*^*^*^*^*^*^*^*^* \n",
615                                         GetCurrentRun()));
616
617
618         // Send the information to ML
619         TMonaLisaText  mlStatus("SHUTTLE_status", "Processing");
620
621         TList mlList;
622         mlList.Add(&mlStatus);
623
624         fMonaLisa->SendParameters(&mlList);
625                         
626         fLogbookEntry->Print("all");
627
628         // Initialization
629         Bool_t hasError = kFALSE;
630         for(Int_t iSys=0;iSys<3;iSys++) fFXSCalled[iSys]=kFALSE;
631
632         AliCDBStorage *mainCDBSto = AliCDBManager::Instance()->GetStorage(fgkMainCDB);
633         if(mainCDBSto) mainCDBSto->QueryCDB(GetCurrentRun());
634         AliCDBStorage *mainRefSto = AliCDBManager::Instance()->GetStorage(fgkMainRefStorage);
635         if(mainRefSto) mainRefSto->QueryCDB(GetCurrentRun());
636
637         // Loop on detectors in the configuration
638         TIter iter(fConfig->GetDetectors());
639         TObjString* aDetector = 0;
640
641         while ((aDetector = (TObjString*) iter.Next()))
642         {
643                 fCurrentDetector = aDetector->String();
644
645                 if (ContinueProcessing() == kFALSE) continue;
646
647                 AliInfo(Form("\n\n \t\t\t****** run %d - %s: START  ******",
648                                                 GetCurrentRun(), aDetector->GetName()));
649
650                 Log(fCurrentDetector.Data(), "Starting processing");
651
652                 Int_t pid = fork();
653
654                 if (pid < 0)
655                 {
656                         Log("SHUTTLE", "ERROR: Forking failed");
657                 }
658                 else if (pid > 0)
659                 {
660                         // parent
661                         AliInfo(Form("In parent process of %d - %s: Starting monitoring",
662                                                         GetCurrentRun(), aDetector->GetName()));
663
664                         Long_t begin = time(0);
665
666                         int status; // to be used with waitpid, on purpose an int (not Int_t)!
667                         while (waitpid(pid, &status, WNOHANG) == 0)
668                         {
669                                 Long_t expiredTime = time(0) - begin;
670
671                                 if (expiredTime > fConfig->GetPPTimeOut())
672                                 {
673                                         Log("SHUTTLE", Form("Process time out. Run time: %d seconds. Killing...",
674                                                                 expiredTime));
675
676                                         kill(pid, 9);
677
678                                         hasError = kTRUE;
679
680                                         gSystem->Sleep(1000);
681                                 }
682                                 else
683                                 {
684                                         if (expiredTime % 60 == 0)
685                                         Log("SHUTTLE", Form("Checked process. Run time: %d seconds.",
686                                                                 expiredTime));
687                                         gSystem->Sleep(1000);
688                                 }
689                         }
690
691                         AliInfo(Form("In parent process of %d - %s: Client has terminated.",
692                                                                 GetCurrentRun(), aDetector->GetName()));
693
694                         if (WIFEXITED(status))
695                         {
696                                 Int_t returnCode = WEXITSTATUS(status);
697
698                                 Log("SHUTTLE", Form("The return code is %d", returnCode));
699
700                                 if (returnCode != 0)
701                                 hasError = kTRUE;
702                         }
703                 }
704                 else if (pid == 0)
705                 {
706                         // client
707                         AliInfo(Form("In client process of %d - %s", GetCurrentRun(), aDetector->GetName()));
708
709                         UInt_t result = ProcessCurrentDetector();
710
711                         Int_t returnCode = 0; // will be set to 1 in case of an error
712
713                         if (!result)
714                         {
715                                 returnCode = 1;
716                                 AliInfo(Form("\n \t\t\t****** run %d - %s: PREPROCESSOR ERROR ****** \n\n",
717                                                         GetCurrentRun(), aDetector->GetName()));
718                         }
719                         else if (result == 2)
720                         {
721                                 AliInfo(Form("\n \t\t\t****** run %d - %s: STORAGE ERROR ****** \n\n",
722                                                         GetCurrentRun(), aDetector->GetName()));
723                         } else
724                         {
725                                 AliInfo(Form("\n \t\t\t****** run %d - %s: DONE ****** \n\n",
726                                                         GetCurrentRun(), aDetector->GetName()));
727                         }
728
729                         if (result > 0)
730                         {
731                                 // Process successful: Update time_processed field in FXS logbooks!
732                                 if (fFXSCalled[kDAQ])
733                                 {
734                                         if (UpdateDAQTable() == kFALSE)
735                                         returnCode = 1;
736                                         fFXSlist[kDAQ].Clear();
737                                 }
738                                 //if(fFXSCalled[kDCS]) {
739                                 //  if (UpdateDCSTable(aDetector->GetName()) == kFALSE)
740                                 //    returnCode = 1;
741                                 //  fFXSlist[kDCS].Clear();
742                                 //}
743                                 if (fFXSCalled[kHLT])
744                                 {
745                                         if (UpdateHLTTable() == kFALSE)
746                                         returnCode = 1;
747                                         fFXSlist[kHLT].Clear();
748                                 }
749                         }
750
751                         AliInfo(Form("Client process of %d - %s is exiting now with %d.",
752                                                         GetCurrentRun(), aDetector->GetName(), returnCode));
753
754                         // the client exits here
755                         gSystem->Exit(returnCode);
756
757                         AliError("We should never get here!!!");
758                 }
759         }
760
761         AliInfo(Form("\n\n \t\t\t^*^*^*^*^*^*^*^*^*^*^*^* run %d: FINISH ^*^*^*^*^*^*^*^*^*^*^*^* \n",
762                                                         GetCurrentRun()));
763
764         //check if shuttle is done for this run, if so update logbook
765         TObjArray checkEntryArray;
766         checkEntryArray.SetOwner(1);
767         TString whereClause = Form("where run=%d", GetCurrentRun());
768         if (!QueryShuttleLogbook(whereClause.Data(), checkEntryArray) || checkEntryArray.GetEntries() == 0) {
769                 Log("SHUTTLE", Form("Process - Warning: Cannot check status of run %d on Shuttle logbook!",
770                                                 GetCurrentRun()));
771                 return hasError == kFALSE;
772         }
773
774         AliShuttleLogbookEntry* checkEntry = dynamic_cast<AliShuttleLogbookEntry*>
775                                                 (checkEntryArray.At(0));
776
777         if (checkEntry)
778         {
779                 if (checkEntry->IsDone())
780                 {
781                         Log("SHUTTLE","Process - Shuttle is DONE. Updating logbook");
782                         UpdateShuttleLogbook("shuttle_done");
783                 }
784                 else
785                 {
786                         for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
787                         {
788                                 if (checkEntry->GetDetectorStatus(iDet) == AliShuttleLogbookEntry::kUnprocessed)
789                                 {
790                                         AliDebug(2, Form("Run %d: setting %s as \"not first time unprocessed\"",
791                                                         checkEntry->GetRun(), GetDetName(iDet)));
792                                         fFirstUnprocessed[iDet] = kFALSE;
793                                 }
794                         }
795                 }
796         }
797
798         // remove ML instance
799         delete fMonaLisa;
800         fMonaLisa = 0;
801
802         fLogbookEntry = 0;
803
804         return hasError == kFALSE;
805 }
806
807 //______________________________________________________________________________________________
808 UInt_t AliShuttle::ProcessCurrentDetector()
809 {
810         //
811         // Makes data retrieval just for a specific detector (fCurrentDetector).
812         // Threre should be a configuration for this detector.
813
814         AliInfo(Form("Retrieving values for %s, run %d", fCurrentDetector.Data(), GetCurrentRun()));
815
816         UpdateShuttleStatus(AliShuttleStatus::kDCSStarted);
817
818         TMap dcsMap;
819         dcsMap.SetOwner(1);
820
821         Bool_t aDCSError = kFALSE;
822         fGridError = kFALSE;
823
824         // TODO Test only... I've added a flag that allows to
825         // exclude DCS archive DB query
826         if (!fgkProcessDCS)
827         {
828                 AliInfo("Skipping DCS processing!");
829                 aDCSError = kFALSE;
830         } else {
831                 TString host(fConfig->GetDCSHost(fCurrentDetector));
832                 Int_t port = fConfig->GetDCSPort(fCurrentDetector);
833
834                 // Retrieval of Aliases
835                 TObjString* anAlias = 0;
836                 Int_t iAlias = 1;
837                 Int_t nTotAliases= ((TMap*)fConfig->GetDCSAliases(fCurrentDetector))->GetEntries();
838                 TIter iterAliases(fConfig->GetDCSAliases(fCurrentDetector));
839                 while ((anAlias = (TObjString*) iterAliases.Next()))
840                 {
841                         TObjArray *valueSet = new TObjArray();
842                         valueSet->SetOwner(1);
843
844                         if (((iAlias-1) % 500) == 0 || iAlias == nTotAliases)
845                                 AliInfo(Form("Querying DCS archive: alias %s (%d of %d)",
846                                                 anAlias->GetName(), iAlias++, nTotAliases));
847                         aDCSError = (GetValueSet(host, port, anAlias->String(), valueSet, kAlias) == 0);
848
849                         if(!aDCSError)
850                         {
851                                 dcsMap.Add(anAlias->Clone(), valueSet);
852                         } else {
853                                 Log(fCurrentDetector,
854                                         Form("ProcessCurrentDetector - Error while retrieving alias %s",
855                                                 anAlias->GetName()));
856                                 UpdateShuttleStatus(AliShuttleStatus::kDCSError);
857                                 dcsMap.DeleteAll();
858                                 return 0;
859                         }
860                 }
861
862                 // Retrieval of Data Points
863                 TObjString* aDP = 0;
864                 Int_t iDP = 0;
865                 Int_t nTotDPs= ((TMap*)fConfig->GetDCSDataPoints(fCurrentDetector))->GetEntries();
866                 TIter iterDP(fConfig->GetDCSDataPoints(fCurrentDetector));
867                 while ((aDP = (TObjString*) iterDP.Next()))
868                 {
869                         TObjArray *valueSet = new TObjArray();
870                         valueSet->SetOwner(1);
871                         if (((iDP-1) % 500) == 0 || iDP == nTotDPs)
872                                 AliInfo(Form("Querying DCS archive: DP %s (%d of %d)",
873                                                 aDP->GetName(), iDP++, nTotDPs));
874                         aDCSError = (GetValueSet(host, port, aDP->String(), valueSet, kDP) == 0);
875
876                         if(!aDCSError)
877                         {
878                                 dcsMap.Add(aDP->Clone(), valueSet);
879                         } else {
880                                 Log(fCurrentDetector,
881                                         Form("ProcessCurrentDetector - Error while retrieving data point %s",
882                                                 aDP->GetName()));
883                                 UpdateShuttleStatus(AliShuttleStatus::kDCSError);
884                                 dcsMap.DeleteAll();
885                                 return 0;
886                         }
887                 }
888         }
889
890         // DCS Archive DB processing successful. Call Preprocessor!
891         UpdateShuttleStatus(AliShuttleStatus::kPPStarted);
892
893         AliPreprocessor* aPreprocessor =
894                 dynamic_cast<AliPreprocessor*> (fPreprocessorMap.GetValue(fCurrentDetector));
895
896         aPreprocessor->Initialize(GetCurrentRun(), GetCurrentStartTime(), GetCurrentEndTime());
897         UInt_t aPPResult = aPreprocessor->Process(&dcsMap);
898
899         UInt_t returnValue = 0;
900         if (aPPResult == 0) { // Preprocessor error
901                 UpdateShuttleStatus(AliShuttleStatus::kPPError);
902                 returnValue = 0;
903         } else if (fGridError == kFALSE) { // process and Grid storage ok!
904                 UpdateShuttleStatus(AliShuttleStatus::kDone);
905                 UpdateShuttleLogbook(fCurrentDetector, "DONE");
906                 Log(fCurrentDetector.Data(),
907                         "ProcessCurrentDetector - Preprocessor and Grid storage ended successfully");
908                 returnValue = 1;
909         } else { // Grid storage error (process ok, but object put in local storage)
910                 UpdateShuttleStatus(AliShuttleStatus::kStoreFailed);
911                 returnValue = 2;
912         }
913
914         dcsMap.DeleteAll();
915
916         return returnValue;
917 }
918
919 //______________________________________________________________________________________________
920 Bool_t AliShuttle::QueryShuttleLogbook(const char* whereClause,
921                 TObjArray& entries)
922 {
923 // Query DAQ's Shuttle logbook and fills detector status object.
924 // Call QueryRunParameters to query DAQ logbook for run parameters.
925
926         entries.SetOwner(1);
927
928         // check connection, in case connect
929         if(!Connect(3)) return kFALSE;
930
931         TString sqlQuery;
932         sqlQuery = Form("select * from logbook_shuttle %s order by run", whereClause);
933
934         TSQLResult* aResult = fServer[3]->Query(sqlQuery);
935         if (!aResult) {
936                 AliError(Form("Can't execute query <%s>!", sqlQuery.Data()));
937                 return kFALSE;
938         }
939
940         AliDebug(2,Form("Query = %s", sqlQuery.Data()));
941
942         if(aResult->GetRowCount() == 0) {
943 //              if(sqlQuery.EndsWith("where shuttle_done=0 order by run")){
944 //                      Log("SHUTTLE", "QueryShuttleLogbook - All runs in Shuttle Logbook are already DONE");
945 //                      delete aResult;
946 //                      return kTRUE;
947 //              } else {
948                         AliInfo("No entries in Shuttle Logbook match request");
949                         delete aResult;
950                         return kTRUE;
951 //              }
952         }
953
954         // TODO Check field count!
955         const UInt_t nCols = 22;
956         if (aResult->GetFieldCount() != (Int_t) nCols) {
957                 AliError("Invalid SQL result field number!");
958                 delete aResult;
959                 return kFALSE;
960         }
961
962         TSQLRow* aRow;
963         while ((aRow = aResult->Next())) {
964                 TString runString(aRow->GetField(0), aRow->GetFieldLength(0));
965                 Int_t run = runString.Atoi();
966
967                 AliShuttleLogbookEntry *entry = QueryRunParameters(run);
968                 if (!entry)
969                         continue;
970
971                 // loop on detectors
972                 for(UInt_t ii = 0; ii < nCols; ii++)
973                         entry->SetDetectorStatus(aResult->GetFieldName(ii), aRow->GetField(ii));
974
975                 entries.AddLast(entry);
976                 delete aRow;
977         }
978
979 //      if(sqlQuery.EndsWith("where shuttle_done=0 order by run"))
980 //              Log("SHUTTLE", Form("QueryShuttleLogbook - Found %d unprocessed runs in Shuttle Logbook",
981 //                                                      entries.GetEntriesFast()));
982         delete aResult;
983         return kTRUE;
984 }
985
986 //______________________________________________________________________________________________
987 AliShuttleLogbookEntry* AliShuttle::QueryRunParameters(Int_t run)
988 {
989         //
990         // Retrieve run parameters written in the DAQ logbook and sets them into AliShuttleLogbookEntry object
991         //
992
993         // check connection, in case connect
994         if (!Connect(3))
995                 return 0;
996
997         TString sqlQuery;
998         sqlQuery.Form("select * from %s where run=%d", fConfig->GetDAQlbTable(), run);
999
1000         TSQLResult* aResult = fServer[3]->Query(sqlQuery);
1001         if (!aResult) {
1002                 AliError(Form("Can't execute query <%s>!", sqlQuery.Data()));
1003                 return 0;
1004         }
1005
1006         if (aResult->GetRowCount() == 0) {
1007                 Log("SHUTTLE", Form("QueryRunParameters - No entry in DAQ Logbook for run %d. Skipping", run));
1008                 delete aResult;
1009                 return 0;
1010         }
1011
1012         if (aResult->GetRowCount() > 1) {
1013                 AliError(Form("More than one entry in DAQ Logbook for run %d. Skipping", run));
1014                 delete aResult;
1015                 return 0;
1016         }
1017
1018         TSQLRow* aRow = aResult->Next();
1019         if (!aRow)
1020         {
1021                 AliError(Form("Could not retrieve row for run %d. Skipping", run));
1022                 delete aResult;
1023                 return 0;
1024         }
1025
1026         AliShuttleLogbookEntry* entry = new AliShuttleLogbookEntry(run);
1027
1028         for (Int_t ii = 0; ii < aResult->GetFieldCount(); ii++)
1029                 entry->SetRunParameter(aResult->GetFieldName(ii), aRow->GetField(ii));
1030
1031         UInt_t startTime = entry->GetStartTime();
1032         UInt_t endTime = entry->GetEndTime();
1033
1034         if (!startTime || !endTime || startTime > endTime) {
1035                 Log("SHUTTLE",
1036                         Form("QueryRunParameters - Invalid parameters for Run %d: startTime = %d, endTime = %d",
1037                                 run, startTime, endTime));
1038                 delete entry;
1039                 delete aRow;
1040                 delete aResult;
1041                 return 0;
1042         }
1043
1044         delete aRow;
1045         delete aResult;
1046
1047         return entry;
1048 }
1049
1050 //______________________________________________________________________________________________
1051 Bool_t AliShuttle::TryToStoreAgain()
1052 {
1053   // Called in case the detector failed to store the object in Grid OCDB
1054   // It tries to store the object again, if it does not find more recent and overlapping objects
1055   // Calls underlying TryToStoreAgain(const char*) function twice, for OCDB and Reference storage.
1056
1057         AliInfo("Trying to store OCDB data again...");
1058         Bool_t resultCDB = TryToStoreAgain(fgkMainCDB);
1059
1060         AliInfo("Trying to store reference data again...");
1061         Bool_t resultRef = TryToStoreAgain(fgkMainRefStorage);
1062
1063         return resultCDB && resultRef;
1064 }
1065
1066 //______________________________________________________________________________________________
1067 Bool_t AliShuttle::TryToStoreAgain(TString& gridURI)
1068 {
1069   // Called by TryToStoreAgain(), performs actual storage retry
1070
1071         TObjArray* gridIds=0;
1072
1073         Bool_t result = kTRUE;
1074
1075         const char* type = 0;
1076         TString backupURI;
1077         if(gridURI == fgkMainCDB) {
1078                 type = "OCDB";
1079                 backupURI = fgkLocalCDB;
1080         } else if(gridURI == fgkMainRefStorage) {
1081                 type = "reference";
1082                 backupURI = fgkLocalRefStorage;
1083         } else {
1084                 AliError(Form("Invalid storage URI: %s", gridURI.Data()));
1085                 return kFALSE;
1086         }
1087
1088         AliCDBManager* man = AliCDBManager::Instance();
1089
1090         AliCDBStorage *gridSto = man->GetStorage(gridURI);
1091         if(!gridSto) {
1092                 Log(fCurrentDetector.Data(),
1093                         Form("TryToStoreAgain - cannot activate main %s storage", type));
1094                 return kFALSE;
1095         }
1096
1097         gridIds = gridSto->GetQueryCDBList();
1098
1099         // get objects previously stored in local CDB
1100         AliCDBStorage *backupSto = man->GetStorage(backupURI);
1101         AliCDBPath aPath(GetOfflineDetName(fCurrentDetector.Data()),"*","*");
1102         // Local objects were stored with current run as Grid version!
1103         TList* localEntries = backupSto->GetAll(aPath.GetPath(), GetCurrentRun(), GetCurrentRun());
1104         localEntries->SetOwner(1);
1105
1106         // loop on local stored objects
1107         TIter localIter(localEntries);
1108         AliCDBEntry *aLocEntry = 0;
1109         while((aLocEntry = dynamic_cast<AliCDBEntry*> (localIter.Next()))){
1110                 aLocEntry->SetOwner(1);
1111                 AliCDBId aLocId = aLocEntry->GetId();
1112                 aLocEntry->SetVersion(-1);
1113                 aLocEntry->SetSubVersion(-1);
1114
1115                 // loop on Grid valid Id's
1116                 Bool_t store = kTRUE;
1117                 TIter gridIter(gridIds);
1118                 AliCDBId* aGridId = 0;
1119                 while((aGridId = dynamic_cast<AliCDBId*> (gridIter.Next()))){
1120                         // If local object is valid up to infinity we store it only if it is
1121                         // the first unprocessed run!
1122                         if (aLocId.GetLastRun() == AliCDBRunRange::Infinity())
1123                         {
1124                                 if (!fFirstUnprocessed[GetDetPos(fCurrentDetector)])
1125                                 {
1126                                         Log(fCurrentDetector.Data(),
1127                                                 ("TryToStoreAgain - This object has validity infinite but "
1128                                                  "there are previous unprocessed runs!"));
1129                                         continue;
1130                                 } else {
1131                                         break;
1132                                 }
1133                         }
1134                         if(aGridId->GetPath() != aLocId.GetPath()) continue;
1135                         // skip all objects valid up to infinity
1136                         if(aGridId->GetLastRun() == AliCDBRunRange::Infinity()) continue;
1137                         // if we get here, it means there's already some more recent object stored on Grid!
1138                         store = kFALSE;
1139                         break;
1140                 }
1141
1142                 if(!store){
1143                         Log(fCurrentDetector.Data(),
1144                                 Form("TryToStoreAgain - A more recent object already exists in %s storage: <%s>",
1145                                         type, aGridId->ToString().Data()));
1146                         // removing local filename...
1147                         // TODO maybe it's better not to remove it, it was not copied to the Grid!
1148                         TString filename;
1149                         backupSto->IdToFilename(aLocId, filename);
1150                         AliInfo(Form("Removing local file %s", filename.Data()));
1151                         gSystem->Exec(Form("rm %s",filename.Data()));
1152                         continue;
1153                 }
1154
1155                 // If we get here, the file can be stored!
1156                 Bool_t storeOk = gridSto->Put(aLocEntry);
1157                 if(storeOk){
1158                         Log(fCurrentDetector.Data(),
1159                                 Form("TryToStoreAgain - Object <%s> successfully put into %s storage",
1160                                         aLocId.ToString().Data(), type));
1161
1162                         // removing local filename...
1163                         TString filename;
1164                         backupSto->IdToFilename(aLocId, filename);
1165                         AliInfo(Form("Removing local file %s", filename.Data()));
1166                         gSystem->Exec(Form("rm %s", filename.Data()));
1167                         continue;
1168                 } else  {
1169                         Log(fCurrentDetector.Data(),
1170                                 Form("TryToStoreAgain - Grid %s storage of object <%s> failed again",
1171                                         type, aLocId.ToString().Data()));
1172                         result = kFALSE;
1173                 }
1174         }
1175         localEntries->Clear();
1176
1177         return result;
1178 }
1179
1180 //______________________________________________________________________________________________
1181 Bool_t AliShuttle::GetValueSet(const char* host, Int_t port, const char* entry,
1182                                 TObjArray* valueSet, DCSType type)
1183 {
1184 // Retrieve all "entry" data points from the DCS server
1185 // host, port: TSocket connection parameters
1186 // entry: name of the alias or data point
1187 // valueSet: array of retrieved AliDCSValue's
1188 // type: kAlias or kDP
1189
1190         AliDCSClient client(host, port, fTimeout, fRetries);
1191         if (!client.IsConnected())
1192         {
1193                 return kFALSE;
1194         }
1195
1196         Int_t result=0;
1197
1198         if (type == kAlias)
1199         {
1200                 result = client.GetAliasValues(entry,
1201                         GetCurrentStartTime(), GetCurrentEndTime(), valueSet);
1202         } else
1203         if (type == kDP)
1204         {
1205                 result = client.GetDPValues(entry,
1206                         GetCurrentStartTime(), GetCurrentEndTime(), valueSet);
1207         }
1208
1209         if (result < 0)
1210         {
1211                 Log(fCurrentDetector.Data(), Form("GetValueSet - Can't get '%s'! Reason: %s",
1212                         entry, AliDCSClient::GetErrorString(result)));
1213
1214                 if (result == AliDCSClient::fgkServerError)
1215                 {
1216                         Log(fCurrentDetector.Data(), Form("GetValueSet - Server error: %s",
1217                                 client.GetServerError().Data()));
1218                 }
1219
1220                 return kFALSE;
1221         }
1222
1223         return kTRUE;
1224 }
1225
1226 //______________________________________________________________________________________________
1227 const char* AliShuttle::GetFile(Int_t system, const char* detector,
1228                 const char* id, const char* source)
1229 {
1230 // Get calibration file from file exchange servers
1231 // calls specific getter according to system index (kDAQ, kDCS, kHLT)
1232
1233         switch(system){
1234                 case kDAQ:
1235                         return GetDAQFileName(detector, id, source);
1236                         break;
1237                 case kDCS:
1238                         return GetDCSFileName(detector, id, source);
1239                         break;
1240                 case kHLT:
1241                         return GetHLTFileName(detector, id, source);
1242                         break;
1243                 default:
1244                         AliError(Form("No valid system index: %d",system));
1245         }
1246
1247         return 0;
1248 }
1249
1250 //______________________________________________________________________________________________
1251 TList* AliShuttle::GetFileSources(Int_t system, const char* detector, const char* id)
1252 {
1253 // Get sources producing the condition file Id from file exchange servers
1254 // calls specific getter according to system index (kDAQ, kDCS, kHLT)
1255
1256         switch(system){
1257                 case kDAQ:
1258                         return GetDAQFileSources(detector, id);
1259                         break;
1260                 case kDCS:
1261                         return GetDCSFileSources(detector, id);
1262                         break;
1263                 case kHLT:
1264                         return GetHLTFileSources(detector, id);
1265                         break;
1266                 default:
1267                         AliError(Form("No valid system index: %d",system));
1268         }
1269
1270         return NULL;
1271 }
1272
1273 //______________________________________________________________________________________________
1274 Bool_t AliShuttle::Connect(Int_t system)
1275 {
1276 // Connect to MySQL Server of the system's FXS MySQL databases
1277 // DAQ Logbook, Shuttle Logbook and DAQ FXS db are on the same host
1278
1279         // check connection: if already connected return
1280         if(fServer[system] && fServer[system]->IsConnected()) return kTRUE;
1281
1282         TString dbHost, dbUser, dbPass, dbName;
1283
1284         if (system < 3) // FXS db servers
1285         {
1286                 dbHost = Form("mysql://%s:%d", fConfig->GetFXSdbHost(system), fConfig->GetFXSdbPort(system));
1287                 dbUser = fConfig->GetFXSdbUser(system);
1288                 dbPass = fConfig->GetFXSdbPass(system);
1289                 dbName =   fConfig->GetFXSdbName(system);
1290         } else { // Run & Shuttle logbook servers
1291         // TODO Will the Shuttle logbook server be the same as the Run logbook server ???
1292                 dbHost = Form("mysql://%s:%d", fConfig->GetDAQlbHost(), fConfig->GetDAQlbPort());
1293                 dbUser = fConfig->GetDAQlbUser();
1294                 dbPass = fConfig->GetDAQlbPass();
1295                 dbName =   fConfig->GetDAQlbDB();
1296         }
1297
1298         fServer[system] = TSQLServer::Connect(dbHost.Data(), dbUser.Data(), dbPass.Data());
1299         if (!fServer[system] || !fServer[system]->IsConnected()) {
1300                 if(system < 3)
1301                 {
1302                 AliError(Form("Can't establish connection to FXS database for %s",
1303                                         AliShuttleInterface::GetSystemName(system)));
1304                 } else {
1305                 AliError("Can't establish connection to Run logbook.");
1306                 }
1307                 if(fServer[system]) delete fServer[system];
1308                 return kFALSE;
1309         }
1310
1311         // Get tables
1312         // TODO in the configuration should the table name be there too?
1313         TSQLResult* aResult=0;
1314         switch(system){
1315                 case kDAQ:
1316                         aResult = fServer[kDAQ]->GetTables(dbName.Data());
1317                         break;
1318                 case kDCS:
1319                         //aResult = fServer[kDCS]->GetTables(dbName.Data());
1320                         break;
1321                 case kHLT:
1322                         aResult = fServer[kHLT]->GetTables(dbName.Data());
1323                         break;
1324                 default:
1325                         aResult = fServer[3]->GetTables(dbName.Data());
1326                         break;
1327         }
1328
1329         delete aResult;
1330         return kTRUE;
1331 }
1332
1333 //______________________________________________________________________________________________
1334 const char* AliShuttle::GetDAQFileName(const char* detector, const char* id, const char* source)
1335 {
1336 // Retrieves a file from the DAQ FXS.
1337 // First queris the DAQ FXS database for the DAQ file name, using the run, detector, id and source info
1338 // then calls RetrieveDAQFile(DAQfilename) for actual copy to local disk
1339 // run: current run being processed (given by Logbook entry fLogbookEntry)
1340 // detector: the Preprocessor name
1341 // id: provided as a parameter by the Preprocessor
1342 // source: provided by the Preprocessor through GetFileSources function
1343
1344         // check connection, in case connect
1345         if (!Connect(kDAQ))
1346         {
1347                 Log(detector, "GetDAQFileName - Couldn't connect to DAQ FXS database");
1348                 return 0;
1349         }
1350
1351         // Query preparation
1352         TString sqlQueryStart = Form("select filePath from %s where", fConfig->GetFXSdbTable(kDAQ));
1353         TString whereClause = Form("run=%d and detector=\"%s\" and fileId=\"%s\" and DAQsource=\"%s\"",
1354                                 GetCurrentRun(), detector, id, source);
1355         TString sqlQuery = Form("%s %s", sqlQueryStart.Data(), whereClause.Data());
1356
1357         AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
1358
1359         // Query execution
1360         TSQLResult* aResult = 0;
1361         aResult = dynamic_cast<TSQLResult*> (fServer[kDAQ]->Query(sqlQuery));
1362         if (!aResult) {
1363                 Log(detector, Form("GetDAQFileName - Can't execute SQL query for: id = %s, source = %s",
1364                                 id, source));
1365                 return 0;
1366         }
1367
1368         if(aResult->GetRowCount() == 0)
1369         {
1370                 Log(detector,
1371                         Form("GetDAQFileName - No entry in FXS table for: id = %s, source = %s",
1372                                 id, source));
1373                 delete aResult;
1374                 return 0;
1375         }
1376
1377         if (aResult->GetRowCount() > 1) {
1378                 Log(detector,
1379                         Form("GetDAQFileName - More than one entry in FXS table for: id = %s, source = %s",
1380                                 id, source));
1381                 delete aResult;
1382                 return 0;
1383         }
1384
1385         TSQLRow* aRow = dynamic_cast<TSQLRow*> (aResult->Next());
1386
1387         if (!aRow){
1388                 Log(detector, Form("GetDAQFileName - Empty set result from query: id = %s, source = %s",
1389                                 id, source));
1390                 delete aResult;
1391                 return 0;
1392         }
1393
1394         TString filePath(aRow->GetField(0), aRow->GetFieldLength(0));
1395
1396         delete aResult;
1397         delete aRow;
1398
1399         AliDebug(2, Form("filePath = %s",filePath.Data()));
1400
1401         // retrieved file is renamed to make it unique
1402         TString localFileName = Form("DAQ_%s_%d_%s_%s.shuttle",
1403                                         detector, GetCurrentRun(), id, source);
1404
1405         // file retrieval from DAQ FXS
1406         Bool_t result = RetrieveDAQFile(filePath.Data(), localFileName.Data());
1407         if(!result) {
1408                 Log(detector, Form("GetDAQFileName - Copy of file %s from DAQ FXS failed", filePath.Data()));
1409                 return 0;
1410         } else {
1411                 AliInfo(Form("File %s copied from DAQ FXS into %s/%s",
1412                         filePath.Data(), GetShuttleTempDir(), localFileName.Data()));
1413         }
1414
1415         fFXSCalled[kDAQ]=kTRUE;
1416         TObjString *fileParams = new TObjString(Form("%s#!?!#%s", id, source));
1417         fFXSlist[kDAQ].Add(fileParams);
1418
1419         static TString fullLocalFileName;
1420         fullLocalFileName = TString::Format("%s/%s", GetShuttleTempDir(), localFileName.Data());
1421
1422         AliInfo(Form("fullLocalFileName = %s", fullLocalFileName.Data()));
1423
1424         return fullLocalFileName.Data();
1425
1426 }
1427
1428 //______________________________________________________________________________________________
1429 Bool_t AliShuttle::RetrieveDAQFile(const char* daqFileName, const char* localFileName)
1430 {
1431 // Copies file from DAQ FXS to local Shuttle machine
1432
1433         // check temp directory: trying to cd to temp; if it does not exist, create it
1434         AliDebug(2, Form("Copy file %s from DAQ FXS into %s/%s",
1435                         daqFileName, GetShuttleTempDir(), localFileName));
1436
1437         void* dir = gSystem->OpenDirectory(GetShuttleTempDir());
1438         if (dir == NULL) {
1439                 if (gSystem->mkdir(GetShuttleTempDir(), kTRUE)) {
1440                         AliError(Form("Can't open directory <%s>", GetShuttleTempDir()));
1441                         return kFALSE;
1442                 }
1443
1444         } else {
1445                 gSystem->FreeDirectory(dir);
1446         }
1447
1448         TString baseDAQFXSFolder = "FES";
1449         TString command = Form("scp -oPort=%d -2 %s@%s:%s/%s %s/%s",
1450                 fConfig->GetFXSPort(kDAQ),
1451                 fConfig->GetFXSUser(kDAQ),
1452                 fConfig->GetFXSHost(kDAQ),
1453                 baseDAQFXSFolder.Data(),
1454                 daqFileName,
1455                 GetShuttleTempDir(),
1456                 localFileName);
1457
1458         AliDebug(2, Form("%s",command.Data()));
1459
1460         UInt_t nRetries = 0;
1461         UInt_t maxRetries = 3;
1462
1463         // copy!! if successful TSystem::Exec returns 0
1464         while(nRetries++ < maxRetries) {
1465                 AliDebug(2, Form("Trying to copy file. Retry # %d", nRetries));
1466                 if(gSystem->Exec(command.Data()) == 0) return kTRUE;
1467         }
1468
1469         return kFALSE;
1470
1471 }
1472
1473 //______________________________________________________________________________________________
1474 TList* AliShuttle::GetDAQFileSources(const char* detector, const char* id)
1475 {
1476 // Retrieves list of DAQ sources of file Id
1477
1478         // check connection, in case connect
1479         if(!Connect(kDAQ)){
1480                 Log(detector, "GetDAQFileSources - Couldn't connect to DAQ FXS database");
1481                 return 0;
1482         }
1483
1484         // Query preparation
1485         TString sqlQueryStart = Form("select DAQsource from %s where", fConfig->GetFXSdbTable(kDAQ));
1486         TString whereClause = Form("run=%d and detector=\"%s\" and fileId=\"%s\"",
1487                                 GetCurrentRun(), detector, id);
1488         TString sqlQuery = Form("%s %s", sqlQueryStart.Data(), whereClause.Data());
1489
1490         AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
1491
1492         // Query execution
1493         TSQLResult* aResult;
1494         aResult = fServer[kDAQ]->Query(sqlQuery);
1495         if (!aResult) {
1496                 Log(detector, Form("GetDAQFileSources - Can't execute SQL query for id: %s", id));
1497                 return 0;
1498         }
1499
1500         if (aResult->GetRowCount() == 0) {
1501                 Log(detector,
1502                         Form("GetDAQFileSources - No entry in FXS table for id: %s", id));
1503                 delete aResult;
1504                 return 0;
1505         }
1506
1507         TSQLRow* aRow;
1508         TList *list = new TList();
1509         list->SetOwner(1);
1510
1511         while((aRow = aResult->Next())){
1512
1513                 TString daqSource(aRow->GetField(0), aRow->GetFieldLength(0));
1514                 AliDebug(2, Form("daqSource = %s", daqSource.Data()));
1515                 list->Add(new TObjString(daqSource));
1516                 delete aRow;
1517         }
1518         delete aResult;
1519
1520         return list;
1521
1522 }
1523
1524 //______________________________________________________________________________________________
1525 const char* AliShuttle::GetDCSFileName(const char* /*detector*/, const char* /*id*/, const char* /*source*/){
1526 // Retrieves a file from the DCS FXS.
1527
1528 return "You're in DCS";
1529
1530 }
1531
1532 //______________________________________________________________________________________________
1533 TList* AliShuttle::GetDCSFileSources(const char* /*detector*/, const char* /*id*/){
1534 // Retrieves file sources from the DCS FXS.
1535
1536 return NULL;
1537
1538 }
1539
1540 //______________________________________________________________________________________________
1541 const char* AliShuttle::GetHLTFileName(const char* detector, const char* id, const char* source){
1542 // Retrieves a file from the HLT FXS.
1543 // First queris the HLT FXS database for the HLT file name, using the run, detector, id and source info
1544 // then calls RetrieveDAQFile(DAQfilename) for actual copy to local disk
1545 // run: current run being processed (given by Logbook entry fLogbookEntry)
1546 // detector: the Preprocessor name
1547 // id: provided as a parameter by the Preprocessor
1548 // source: provided by the Preprocessor through GetFileSources function
1549
1550         // check connection, in case connect
1551         if (!Connect(kHLT))
1552         {
1553                 Log(detector, "GetHLTFileName - Couldn't connect to HLT FXS database");
1554                 return 0;
1555         }
1556
1557         // Query preparation
1558         TString sqlQueryStart = Form("select filePath,fileSize,fileChecksum from %s where",
1559                                                                                 fConfig->GetFXSdbTable(kHLT));
1560         TString whereClause = Form("run=%d and detector=\"%s\" and fileId=\"%s\" and DDLnumbers=\"%s\"",
1561                                 GetCurrentRun(), detector, id, source);
1562         TString sqlQuery = Form("%s %s", sqlQueryStart.Data(), whereClause.Data());
1563
1564         AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
1565
1566         // Query execution
1567         TSQLResult* aResult = 0;
1568         aResult = dynamic_cast<TSQLResult*> (fServer[kHLT]->Query(sqlQuery));
1569         if (!aResult) {
1570                 Log(detector, Form("GetHLTFileName - Can't execute SQL query for: id = %s, source = %s",
1571                                 id, source));
1572                 return 0;
1573         }
1574
1575         if(aResult->GetRowCount() == 0)
1576         {
1577                 Log(detector,
1578                         Form("GetHLTFileName - No entry in FXS table for: id = %s, source = %s",
1579                                 id, source));
1580                 delete aResult;
1581                 return 0;
1582         }
1583
1584         if (aResult->GetRowCount() > 1) {
1585                 Log(detector,
1586                         Form("GetHLTFileName - More than one entry in FXS table for: id = %s, source = %s",
1587                                 id, source));
1588                 delete aResult;
1589                 return 0;
1590         }
1591
1592         if (aResult->GetFieldCount() != 3) {
1593                 Log(detector,
1594                         Form("GetHLTFileName - Wrong field count in FXS table for: id = %s, source = %s",
1595                                 id, source));
1596                 delete aResult;
1597                 return 0;
1598         }
1599
1600         TSQLRow* aRow = dynamic_cast<TSQLRow*> (aResult->Next());
1601
1602         if (!aRow){
1603                 Log(detector, Form("GetHLTFileName - Empty set result from query: id = %s, source = %s",
1604                                 id, source));
1605                 delete aResult;
1606                 return 0;
1607         }
1608
1609         TString filePath(aRow->GetField(0), aRow->GetFieldLength(0));
1610         TString fileSize(aRow->GetField(1), aRow->GetFieldLength(1));
1611         TString fileMd5Sum(aRow->GetField(2), aRow->GetFieldLength(2));
1612
1613         delete aResult;
1614         delete aRow;
1615
1616         AliDebug(2, Form("filePath = %s",filePath.Data()));
1617
1618         // The full file path in HLT FXS is runNb/DET/DDLnumber/filePath
1619 //      TString fullFilePath = Form("%d/%s/%s/%s", GetCurrentRun(), detector, source, filePath.Data());
1620
1621         // retrieved file is renamed to make it unique
1622         TString localFileName = Form("HLT_%s_%d_%s_%s.shuttle",
1623                                         detector, GetCurrentRun(), id, source);
1624
1625         // file retrieval from HLT FXS
1626         Bool_t result = RetrieveHLTFile(filePath.Data(), localFileName.Data());
1627         if(!result)
1628         {
1629                 Log(detector, Form("GetHLTFileName - Copy of file %s from HLT FXS failed", filePath.Data()));
1630                 return 0;
1631         } else {
1632                 AliInfo(Form("File %s copied from HLT FXS into %s/%s",
1633                         filePath.Data(), GetShuttleTempDir(), localFileName.Data()));
1634         }
1635
1636         // compare md5sum of local file with the one stored in the HLT DB
1637         Int_t md5Comp = gSystem->Exec(Form("md5sum %s/%s |grep %s 2>&1 > /dev/null",
1638                                                 GetShuttleTempDir(), localFileName.Data(), fileMd5Sum.Data()));
1639
1640         if (md5Comp != 0)
1641         {
1642                 Log(detector, Form("GetHLTFileName - md5sum of file %s does not match with local copy!", filePath.Data()));
1643                 return 0;
1644         }
1645
1646         fFXSCalled[kHLT]=kTRUE;
1647         TObjString *fileParams = new TObjString(Form("%s#!?!#%s", id, source));
1648         fFXSlist[kHLT].Add(fileParams);
1649
1650         static TString fullLocalFileName;
1651         fullLocalFileName = TString::Format("%s/%s", GetShuttleTempDir(), localFileName.Data());
1652
1653         AliInfo(Form("fullLocalFileName = %s", fullLocalFileName.Data()));
1654
1655         return fullLocalFileName.Data();
1656
1657 }
1658
1659 //______________________________________________________________________________________________
1660 Bool_t AliShuttle::RetrieveHLTFile(const char* hltFileName, const char* localFileName)
1661 {
1662 // Copies file from HLT FXS to local Shuttle machine
1663
1664         // check temp directory: trying to cd to temp; if it does not exist, create it
1665         AliDebug(2, Form("Copy file %s from HLT FXS into %s/%s",
1666                         hltFileName, GetShuttleTempDir(), localFileName));
1667
1668         void* dir = gSystem->OpenDirectory(GetShuttleTempDir());
1669         if (dir == NULL) {
1670                 if (gSystem->mkdir(GetShuttleTempDir(), kTRUE)) {
1671                         AliError(Form("Can't open directory <%s>", GetShuttleTempDir()));
1672                         return kFALSE;
1673                 }
1674
1675         } else {
1676                 gSystem->FreeDirectory(dir);
1677         }
1678
1679         TString baseHLTFXSFolder = "~";
1680         TString command = Form("scp -oPort=%d %s@%s:%s/%s %s/%s",
1681                 fConfig->GetFXSPort(kHLT),
1682                 fConfig->GetFXSUser(kHLT),
1683                 fConfig->GetFXSHost(kHLT),
1684                 baseHLTFXSFolder.Data(),
1685                 hltFileName,
1686                 GetShuttleTempDir(),
1687                 localFileName);
1688
1689         AliDebug(2, Form("%s",command.Data()));
1690
1691         UInt_t nRetries = 0;
1692         UInt_t maxRetries = 3;
1693
1694         // copy!! if successful TSystem::Exec returns 0
1695         while(nRetries++ < maxRetries) {
1696                 AliDebug(2, Form("Trying to copy file. Retry # %d", nRetries));
1697                 if(gSystem->Exec(command.Data()) == 0) return kTRUE;
1698         }
1699
1700         return kFALSE;
1701
1702 }
1703
1704 //______________________________________________________________________________________________
1705 TList* AliShuttle::GetHLTFileSources(const char* detector, const char* id){
1706 // Retrieves list of HLT sources (DDLnumbers) of file Id
1707
1708         // check connection, in case connect
1709         if(!Connect(kHLT)){
1710                 Log(detector, "GetHLTFileSources - Couldn't connect to HLT FXS database");
1711                 return 0;
1712         }
1713
1714         // Query preparation
1715         TString sqlQueryStart = Form("select DDLnumbers from %s where", fConfig->GetFXSdbTable(kHLT));
1716         TString whereClause = Form("run=%d and detector=\"%s\" and fileId=\"%s\"",
1717                                 GetCurrentRun(), detector, id);
1718         TString sqlQuery = Form("%s %s", sqlQueryStart.Data(), whereClause.Data());
1719
1720         AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
1721
1722         // Query execution
1723         TSQLResult* aResult;
1724         aResult = fServer[kHLT]->Query(sqlQuery);
1725         if (!aResult) {
1726                 Log(detector, Form("GetHLTFileSources - Can't execute SQL query for id: %s", id));
1727                 return 0;
1728         }
1729
1730         if (aResult->GetRowCount() == 0) {
1731                 Log(detector,
1732                         Form("GetHLTFileSources - No entry in FXS table for id: %s", id));
1733                 delete aResult;
1734                 return 0;
1735         }
1736
1737         TSQLRow* aRow;
1738         TList *list = new TList();
1739         list->SetOwner(1);
1740
1741         while((aRow = aResult->Next())){
1742
1743                 TString ddlNumbers(aRow->GetField(0), aRow->GetFieldLength(0));
1744                 AliDebug(2, Form("DDLnumbers = %s", ddlNumbers.Data()));
1745                 list->Add(new TObjString(ddlNumbers));
1746                 delete aRow;
1747         }
1748         delete aResult;
1749
1750         return list;
1751
1752 }
1753
1754 //______________________________________________________________________________________________
1755 Bool_t AliShuttle::UpdateDAQTable()
1756 {
1757 // Update DAQ table filling time_processed field in all rows corresponding to current run and detector
1758
1759         // check connection, in case connect
1760         if(!Connect(kDAQ)){
1761                 Log(fCurrentDetector, "UpdateDAQTable - Couldn't connect to DAQ FXS database");
1762                 return kFALSE;
1763         }
1764
1765         TTimeStamp now; // now
1766
1767         // Loop on FXS list entries
1768         TIter iter(&fFXSlist[kDAQ]);
1769         TObjString *aFXSentry=0;
1770         while((aFXSentry = dynamic_cast<TObjString*> (iter.Next()))){
1771                 TString aFXSentrystr = aFXSentry->String();
1772                 TObjArray *aFXSarray = aFXSentrystr.Tokenize("#!?!#");
1773                 if(!aFXSarray || aFXSarray->GetEntries() != 2 ) {
1774                         Log(fCurrentDetector, Form("UpdateDAQTable - error updating FXS entry. Check string: <%s>",
1775                                 aFXSentrystr.Data()));
1776                         if(aFXSarray) delete aFXSarray;
1777                         return kFALSE;
1778                 }
1779                 const char* fileId = ((TObjString*) aFXSarray->At(0))->GetName();
1780                 const char* daqSource = ((TObjString*) aFXSarray->At(1))->GetName();
1781                 TString whereClause = Form("where run=%d and detector=\"%s\" and fileId=\"%s\" and DAQsource=\"%s\";",
1782                         GetCurrentRun(), fCurrentDetector.Data(), fileId, daqSource);
1783
1784                 delete aFXSarray;
1785
1786                 TString sqlQuery = Form("update %s set time_processed=%d %s", fConfig->GetFXSdbTable(kDAQ),
1787                                                         now.GetSec(), whereClause.Data());
1788
1789                 AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
1790
1791                 // Query execution
1792                 TSQLResult* aResult;
1793                 aResult = dynamic_cast<TSQLResult*> (fServer[kDAQ]->Query(sqlQuery));
1794                 if (!aResult) {
1795                         Log(fCurrentDetector, Form("UpdateDAQTable - Can't execute SQL query <%s>", sqlQuery.Data()));
1796                         return kFALSE;
1797                 }
1798                 delete aResult;
1799         }
1800
1801         return kTRUE;
1802 }
1803
1804 //______________________________________________________________________________________________
1805 Bool_t AliShuttle::UpdateHLTTable()
1806 {
1807 // Update HLT table filling time_processed field in all rows corresponding to current run and detector
1808
1809         // check connection, in case connect
1810         if(!Connect(kHLT)){
1811                 Log(fCurrentDetector, "UpdateHLTTable - Couldn't connect to HLT FXS database");
1812                 return kFALSE;
1813         }
1814
1815         TTimeStamp now; // now
1816
1817         // Loop on FXS list entries
1818         TIter iter(&fFXSlist[kHLT]);
1819         TObjString *aFXSentry=0;
1820         while((aFXSentry = dynamic_cast<TObjString*> (iter.Next()))){
1821                 TString aFXSentrystr = aFXSentry->String();
1822                 TObjArray *aFXSarray = aFXSentrystr.Tokenize("#!?!#");
1823                 if(!aFXSarray || aFXSarray->GetEntries() != 2 ) {
1824                         Log(fCurrentDetector, Form("UpdateHLTTable - error updating FXS entry. Check string: <%s>",
1825                                 aFXSentrystr.Data()));
1826                         if(aFXSarray) delete aFXSarray;
1827                         return kFALSE;
1828                 }
1829                 const char* fileId = ((TObjString*) aFXSarray->At(0))->GetName();
1830                 const char* hltSource = ((TObjString*) aFXSarray->At(1))->GetName();
1831                 TString whereClause = Form("where run=%d and detector=\"%s\" and fileId=\"%s\" and DDLnumbers=\"%s\";",
1832                         GetCurrentRun(), fCurrentDetector.Data(), fileId, hltSource);
1833
1834                 delete aFXSarray;
1835
1836                 TString sqlQuery = Form("update %s set time_processed=%d %s", fConfig->GetFXSdbTable(kHLT),
1837                                                         now.GetSec(), whereClause.Data());
1838
1839                 AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
1840
1841                 // Query execution
1842                 TSQLResult* aResult;
1843                 aResult = dynamic_cast<TSQLResult*> (fServer[kHLT]->Query(sqlQuery));
1844                 if (!aResult) {
1845                         Log(fCurrentDetector, Form("UpdateHLTTable - Can't execute SQL query <%s>", sqlQuery.Data()));
1846                         return kFALSE;
1847                 }
1848                 delete aResult;
1849         }
1850
1851         return kTRUE;
1852 }
1853
1854 //______________________________________________________________________________________________
1855 Bool_t AliShuttle::UpdateShuttleLogbook(const char* detector, const char* status)
1856 {
1857         //
1858         // Update Shuttle logbook filling detector or shuttle_done column
1859         // ex. of usage: UpdateShuttleLogbook("PHOS", "DONE") or UpdateShuttleLogbook("shuttle_done")
1860         //
1861
1862         // check connection, in case connect
1863         if(!Connect(3)){
1864                 Log("SHUTTLE", "UpdateShuttleLogbook - Couldn't connect to DAQ Logbook.");
1865                 return kFALSE;
1866         }
1867
1868         TString detName(detector);
1869         TString setClause;
1870         if(detName == "shuttle_done")
1871         {
1872                 setClause = "set shuttle_done=1";
1873
1874                 // Send the information to ML
1875                 TMonaLisaText  mlStatus("SHUTTLE_status", "Done");
1876
1877                 TList mlList;
1878                 mlList.Add(&mlStatus);
1879
1880                 fMonaLisa->SendParameters(&mlList);
1881         } else {
1882                 TString statusStr(status);
1883                 if(statusStr.Contains("done", TString::kIgnoreCase) ||
1884                    statusStr.Contains("failed", TString::kIgnoreCase)){
1885                         setClause = Form("set %s=\"%s\"", detector, status);
1886                 } else {
1887                         Log("SHUTTLE",
1888                                 Form("UpdateShuttleLogbook - Invalid status <%s> for detector %s",
1889                                         status, detector));
1890                         return kFALSE;
1891                 }
1892         }
1893
1894         TString whereClause = Form("where run=%d", GetCurrentRun());
1895
1896         TString sqlQuery = Form("update logbook_shuttle %s %s",
1897                                         setClause.Data(), whereClause.Data());
1898
1899         AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
1900
1901         // Query execution
1902         TSQLResult* aResult;
1903         aResult = dynamic_cast<TSQLResult*> (fServer[3]->Query(sqlQuery));
1904         if (!aResult) {
1905                 Log("SHUTTLE", Form("UpdateShuttleLogbook - Can't execute query <%s>", sqlQuery.Data()));
1906                 return kFALSE;
1907         }
1908         delete aResult;
1909
1910         return kTRUE;
1911 }
1912
1913 //______________________________________________________________________________________________
1914 Int_t AliShuttle::GetCurrentRun() const
1915 {
1916 // Get current run from logbook entry
1917
1918         return fLogbookEntry ? fLogbookEntry->GetRun() : -1;
1919 }
1920
1921 //______________________________________________________________________________________________
1922 UInt_t AliShuttle::GetCurrentStartTime() const
1923 {
1924 // get current start time
1925
1926         return fLogbookEntry ? fLogbookEntry->GetStartTime() : 0;
1927 }
1928
1929 //______________________________________________________________________________________________
1930 UInt_t AliShuttle::GetCurrentEndTime() const
1931 {
1932 // get current end time from logbook entry
1933
1934         return fLogbookEntry ? fLogbookEntry->GetEndTime() : 0;
1935 }
1936
1937 //______________________________________________________________________________________________
1938 void AliShuttle::Log(const char* detector, const char* message)
1939 {
1940 // Fill log string with a message
1941
1942         void* dir = gSystem->OpenDirectory(GetShuttleLogDir());
1943         if (dir == NULL) {
1944                 if (gSystem->mkdir(GetShuttleLogDir(), kTRUE)) {
1945                         AliError(Form("Can't open directory <%s>", GetShuttleLogDir()));
1946                         return;
1947                 }
1948
1949         } else {
1950                 gSystem->FreeDirectory(dir);
1951         }
1952
1953         TString toLog = Form("%s (%d): %s - ", TTimeStamp(time(0)).AsString("s"), getpid(), detector);
1954         if (GetCurrentRun() >= 0) 
1955                 toLog += Form("run %d - ", GetCurrentRun());
1956         toLog += Form("%s", message);
1957
1958         AliInfo(toLog.Data());
1959
1960         TString fileName;
1961         if (GetCurrentRun() >= 0) 
1962                 fileName.Form("%s/%s_%d.log", GetShuttleLogDir(), detector, GetCurrentRun());
1963         else
1964                 fileName.Form("%s/%s.log", GetShuttleLogDir(), detector);
1965         
1966         gSystem->ExpandPathName(fileName);
1967
1968         ofstream logFile;
1969         logFile.open(fileName, ofstream::out | ofstream::app);
1970
1971         if (!logFile.is_open()) {
1972                 AliError(Form("Could not open file %s", fileName.Data()));
1973                 return;
1974         }
1975
1976         logFile << toLog.Data() << "\n";
1977
1978         logFile.close();
1979 }
1980
1981 //______________________________________________________________________________________________
1982 Bool_t AliShuttle::Collect(Int_t run)
1983 {
1984 //
1985 // Collects conditions data for all UNPROCESSED run written to DAQ LogBook in case of run = -1 (default)
1986 // If a dedicated run is given this run is processed
1987 //
1988 // In operational mode, this is the Shuttle function triggered by the EOR signal.
1989 //
1990
1991         if (run == -1)
1992                 Log("SHUTTLE","Collect - Shuttle called. Collecting conditions data for unprocessed runs");
1993         else
1994                 Log("SHUTTLE", Form("Collect - Shuttle called. Collecting conditions data for run %d", run));
1995
1996         SetLastAction("Starting");
1997
1998         TString whereClause("where shuttle_done=0");
1999         if (run != -1)
2000                 whereClause += Form(" and run=%d", run);
2001
2002         TObjArray shuttleLogbookEntries;
2003         if (!QueryShuttleLogbook(whereClause, shuttleLogbookEntries))
2004         {
2005                 Log("SHUTTLE", "Collect - Can't retrieve entries from Shuttle logbook");
2006                 return kFALSE;
2007         }
2008
2009         if (shuttleLogbookEntries.GetEntries() == 0)
2010         {
2011                 if (run == -1)
2012                         Log("SHUTTLE","Collect - Found no UNPROCESSED runs in Shuttle logbook");
2013                 else
2014                         Log("SHUTTLE", Form("Collect - Run %d is already DONE "
2015                                                 "or it does not exist in Shuttle logbook", run));
2016                 return kTRUE;
2017         }
2018
2019         for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
2020                 fFirstUnprocessed[iDet] = kTRUE;
2021
2022         if (run != -1)
2023         {
2024                 // query Shuttle logbook for earlier runs, check if some detectors are unprocessed,
2025                 // flag them into fFirstUnprocessed array
2026                 TString whereClause(Form("where shuttle_done=0 and run < %d", run));
2027                 TObjArray tmpLogbookEntries;
2028                 if (!QueryShuttleLogbook(whereClause, tmpLogbookEntries))
2029                 {
2030                         Log("SHUTTLE", "Collect - Can't retrieve entries from Shuttle logbook");
2031                         return kFALSE;
2032                 }
2033
2034                 TIter iter(&tmpLogbookEntries);
2035                 AliShuttleLogbookEntry* anEntry = 0;
2036                 while ((anEntry = dynamic_cast<AliShuttleLogbookEntry*> (iter.Next())))
2037                 {
2038                         for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
2039                         {
2040                                 if (anEntry->GetDetectorStatus(iDet) == AliShuttleLogbookEntry::kUnprocessed)
2041                                 {
2042                                         AliDebug(2, Form("Run %d: setting %s as \"not first time unprocessed\"",
2043                                                         anEntry->GetRun(), GetDetName(iDet)));
2044                                         fFirstUnprocessed[iDet] = kFALSE;
2045                                 }
2046                         }
2047
2048                 }
2049
2050         }
2051
2052         if (!RetrieveConditionsData(shuttleLogbookEntries))
2053         {
2054                 Log("SHUTTLE", "Collect - Process of at least one run failed");
2055                 return kFALSE;
2056         }
2057
2058         Log("SHUTTLE", "Collect - Requested run(s) successfully processed");
2059         return kTRUE;
2060 }
2061
2062 //______________________________________________________________________________________________
2063 Bool_t AliShuttle::RetrieveConditionsData(const TObjArray& dateEntries)
2064 {
2065 // Retrieve conditions data for all runs that aren't processed yet
2066
2067         Bool_t hasError = kFALSE;
2068
2069         TIter iter(&dateEntries);
2070         AliShuttleLogbookEntry* anEntry;
2071
2072         while ((anEntry = (AliShuttleLogbookEntry*) iter.Next())){
2073                 if (!Process(anEntry)){
2074                         hasError = kTRUE;
2075                 }
2076         }
2077
2078         return hasError == kFALSE;
2079 }
2080
2081 //______________________________________________________________________________________________
2082 ULong_t AliShuttle::GetTimeOfLastAction() const
2083 {
2084         ULong_t tmp;
2085
2086         fMonitoringMutex->Lock();
2087
2088         tmp = fLastActionTime;
2089
2090         fMonitoringMutex->UnLock();
2091
2092         return tmp;
2093 }
2094
2095 //______________________________________________________________________________________________
2096 const TString AliShuttle::GetLastAction() const
2097 {
2098         // returns a string description of the last action
2099
2100         TString tmp;
2101
2102         fMonitoringMutex->Lock();
2103         
2104         tmp = fLastAction;
2105         
2106         fMonitoringMutex->UnLock();
2107
2108         return tmp;
2109 }
2110
2111 //______________________________________________________________________________________________
2112 void AliShuttle::SetLastAction(const char* action)
2113 {
2114         // updates the monitoring variables
2115
2116         fMonitoringMutex->Lock();
2117
2118         fLastAction = action;
2119         fLastActionTime = time(0);
2120         
2121         fMonitoringMutex->UnLock();
2122 }
2123
2124 //______________________________________________________________________________________________
2125 const char* AliShuttle::GetRunParameter(const char* param)
2126 {
2127 // returns run parameter read from DAQ logbook
2128
2129         if(!fLogbookEntry) {
2130                 AliError("No logbook entry!");
2131                 return 0;
2132         }
2133
2134         return fLogbookEntry->GetRunParameter(param);
2135 }
2136
2137 //______________________________________________________________________________________________
2138 Bool_t AliShuttle::SendMail()
2139 {
2140 // sends a mail to the subdetector expert in case of preprocessor error
2141
2142         void* dir = gSystem->OpenDirectory(GetShuttleLogDir());
2143         if (dir == NULL)
2144         {
2145                 if (gSystem->mkdir(GetShuttleLogDir(), kTRUE))
2146                 {
2147                         AliError(Form("Can't open directory <%s>", GetShuttleLogDir()));
2148                         return kFALSE;
2149                 }
2150
2151         } else {
2152                 gSystem->FreeDirectory(dir);
2153         }
2154
2155         TString bodyFileName;
2156         bodyFileName.Form("%s/mail.body", GetShuttleLogDir());
2157         gSystem->ExpandPathName(bodyFileName);
2158
2159         ofstream mailBody;
2160         mailBody.open(bodyFileName, ofstream::out);
2161
2162         if (!mailBody.is_open())
2163         {
2164                 AliError(Form("Could not open mail body file %s", bodyFileName.Data()));
2165                 return kFALSE;
2166         }
2167
2168         TString to="";
2169         TIter iterExperts(fConfig->GetResponsibles(fCurrentDetector));
2170         TObjString *anExpert=0;
2171         while ((anExpert = (TObjString*) iterExperts.Next()))
2172         {
2173                 to += Form("%s,", anExpert->GetName());
2174         }
2175         to.Remove(to.Length()-1);
2176         AliDebug(2, Form("to: %s",to.Data()));
2177
2178         // TODO this will be removed...
2179         if (to.Contains("not_yet_set")) {
2180                 AliInfo("List of detector responsibles not yet set!");
2181                 return kFALSE;
2182         }
2183
2184         TString cc="alberto.colla@cern.ch";
2185
2186         TString subject = Form("%s Shuttle preprocessor error in run %d !",
2187                                 fCurrentDetector.Data(), GetCurrentRun());
2188         AliDebug(2, Form("subject: %s", subject.Data()));
2189
2190         TString body = Form("Dear %s expert(s), \n\n", fCurrentDetector.Data());
2191         body += Form("SHUTTLE just detected that your preprocessor "
2192                         "exited with ERROR state in run %d!!\n\n", GetCurrentRun());
2193         body += Form("Please check %s status on the web page asap!\n\n", fCurrentDetector.Data());
2194         body += Form("The last 10 lines of %s log file are following:\n\n");
2195
2196         AliDebug(2, Form("Body begin: %s", body.Data()));
2197
2198         mailBody << body.Data();
2199         mailBody.close();
2200         mailBody.open(bodyFileName, ofstream::out | ofstream::app);
2201
2202         TString logFileName = Form("%s/%s.log", GetShuttleLogDir(), fCurrentDetector.Data());
2203         TString tailCommand = Form("tail -n 10 %s >> %s", logFileName.Data(), bodyFileName.Data());
2204         if (gSystem->Exec(tailCommand.Data()))
2205         {
2206                 mailBody << Form("%s log file not found ...\n\n", fCurrentDetector.Data());
2207         }
2208
2209         TString endBody = Form("------------------------------------------------------\n\n");
2210         endBody += Form("In case of problems please contact the SHUTTLE core team.\n\n");
2211         endBody += "Please do not answer this message directly, it is automatically generated.\n\n";
2212         endBody += "Sincerely yours,\n\n \t\t\tthe SHUTTLE\n";
2213
2214         AliDebug(2, Form("Body end: %s", endBody.Data()));
2215
2216         mailBody << endBody.Data();
2217
2218         mailBody.close();
2219
2220         // send mail!
2221         TString mailCommand = Form("mail -s \"%s\" -c %s %s < %s",
2222                                                 subject.Data(),
2223                                                 cc.Data(),
2224                                                 to.Data(),
2225                                                 bodyFileName.Data());
2226         AliDebug(2, Form("mail command: %s", mailCommand.Data()));
2227
2228         Bool_t result = gSystem->Exec(mailCommand.Data());
2229
2230         return result == 0;
2231 }
2232
2233 //______________________________________________________________________________________________
2234 void AliShuttle::SetShuttleTempDir(const char* tmpDir)
2235 {
2236 // sets Shuttle temp directory
2237
2238         fgkShuttleTempDir = gSystem->ExpandPathName(tmpDir);
2239 }
2240
2241 //______________________________________________________________________________________________
2242 void AliShuttle::SetShuttleLogDir(const char* logDir)
2243 {
2244 // sets Shuttle log directory
2245
2246         fgkShuttleLogDir = gSystem->ExpandPathName(logDir);
2247 }