o) Adding time out to the execution of the preprocessors: The Shuttle forks and the...
[u/mrichter/AliRoot.git] / SHUTTLE / AliShuttle.cxx
1 /**************************************************************************
2  * Copyright(c) 1998-1999, ALICE Experiment at CERN, All rights reserved. *
3  *                                                                        *
4  * Author: The ALICE Off-line Project.                                    *
5  * Contributors are mentioned in the code where appropriate.              *
6  *                                                                        *
7  * Permission to use, copy, modify and distribute this software and its   *
8  * documentation strictly for non-commercial purposes is hereby granted   *
9  * without fee, provided that the above copyright notice appears in all   *
10  * copies and that both the copyright notice and this permission notice   *
11  * appear in the supporting documentation. The authors make no claims     *
12  * about the suitability of this software for any purpose. It is          *
13  * provided "as is" without express or implied warranty.                  *
14  **************************************************************************/
15
16 /*
17 $Log$
18 Revision 1.17  2006/10/05 16:20:55  jgrosseo
19 adapting to new CDB classes
20
21 Revision 1.16  2006/10/05 15:46:26  jgrosseo
22 applying to the new interface
23
24 Revision 1.15  2006/10/02 16:38:39  jgrosseo
25 update (alberto):
26 fixed memory leaks
27 storing of objects that failed to be stored to the grid before
28 interfacing of shuttle status table in daq system
29
30 Revision 1.14  2006/08/29 09:16:05  jgrosseo
31 small update
32
33 Revision 1.13  2006/08/15 10:50:00  jgrosseo
34 effc++ corrections (alberto)
35
36 Revision 1.12  2006/08/08 14:19:29  jgrosseo
37 Update to shuttle classes (Alberto)
38
39 - Possibility to set the full object's path in the Preprocessor's and
40 Shuttle's  Store functions
41 - Possibility to extend the object's run validity in the same classes
42 ("startValidity" and "validityInfinite" parameters)
43 - Implementation of the StoreReferenceData function to store reference
44 data in a dedicated CDB storage.
45
46 Revision 1.11  2006/07/21 07:37:20  jgrosseo
47 last run is stored after each run
48
49 Revision 1.10  2006/07/20 09:54:40  jgrosseo
50 introducing status management: The processing per subdetector is divided into several steps,
51 after each step the status is stored on disk. If the system crashes in any of the steps the Shuttle
52 can keep track of the number of failures and skips further processing after a certain threshold is
53 exceeded. These thresholds can be configured in LDAP.
54
55 Revision 1.9  2006/07/19 10:09:55  jgrosseo
56 new configuration, accesst to DAQ FES (Alberto)
57
58 Revision 1.8  2006/07/11 12:44:36  jgrosseo
59 adding parameters for extended validity range of data produced by preprocessor
60
61 Revision 1.7  2006/07/10 14:37:09  jgrosseo
62 small fix + todo comment
63
64 Revision 1.6  2006/07/10 13:01:41  jgrosseo
65 enhanced storing of last sucessfully processed run (alberto)
66
67 Revision 1.5  2006/07/04 14:59:57  jgrosseo
68 revision of AliDCSValue: Removed wrapper classes, reduced storage size per value by factor 2
69
70 Revision 1.4  2006/06/12 09:11:16  jgrosseo
71 coding conventions (Alberto)
72
73 Revision 1.3  2006/06/06 14:26:40  jgrosseo
74 o) removed files that were moved to STEER
75 o) shuttle updated to follow the new interface (Alberto)
76
77 Revision 1.2  2006/03/07 07:52:34  hristov
78 New version (B.Yordanov)
79
80 Revision 1.6  2005/11/19 17:19:14  byordano
81 RetrieveDATEEntries and RetrieveConditionsData added
82
83 Revision 1.5  2005/11/19 11:09:27  byordano
84 AliShuttle declaration added
85
86 Revision 1.4  2005/11/17 17:47:34  byordano
87 TList changed to TObjArray
88
89 Revision 1.3  2005/11/17 14:43:23  byordano
90 import to local CVS
91
92 Revision 1.1.1.1  2005/10/28 07:33:58  hristov
93 Initial import as subdirectory in AliRoot
94
95 Revision 1.2  2005/09/13 08:41:15  byordano
96 default startTime endTime added
97
98 Revision 1.4  2005/08/30 09:13:02  byordano
99 some docs added
100
101 Revision 1.3  2005/08/29 21:15:47  byordano
102 some docs added
103
104 */
105
106 //
107 // This class is the main manager for AliShuttle. 
108 // It organizes the data retrieval from DCS and call the 
109 // interface methods of AliPreprocessor.
110 // For every detector in AliShuttleConfgi (see AliShuttleConfig),
111 // data for its set of aliases is retrieved. If there is registered
112 // AliPreprocessor for this detector then it will be used
113 // accroding to the schema (see AliPreprocessor).
114 // If there isn't registered AliPreprocessor than the retrieved
115 // data is stored automatically to the undelying AliCDBStorage.
116 // For detSpec is used the alias name.
117 //
118
119 #include "AliShuttle.h"
120
121 #include "AliCDBManager.h"
122 #include "AliCDBStorage.h"
123 #include "AliCDBId.h"
124 #include "AliCDBRunRange.h"
125 #include "AliCDBPath.h"
126 #include "AliCDBEntry.h"
127 #include "AliShuttleConfig.h"
128 #include "AliDCSClient.h"
129 #include "AliLog.h"
130 #include "AliPreprocessor.h"
131 #include "AliShuttleStatus.h"
132 #include "AliShuttleLogbookEntry.h"
133
134 #include <TSystem.h>
135 #include <TObject.h>
136 #include <TString.h>
137 #include <TTimeStamp.h>
138 #include <TObjString.h>
139 #include <TSQLServer.h>
140 #include <TSQLResult.h>
141 #include <TSQLRow.h>
142 #include <TMutex.h>
143
144 #include <fstream>
145
146 #include <sys/types.h>
147 #include <sys/wait.h>
148
149 ClassImp(AliShuttle)
150
151 TString AliShuttle::fgkMainCDB("alien://folder=ShuttleCDB");
152 TString AliShuttle::fgkLocalCDB("local://LocalShuttleCDB");
153 TString AliShuttle::fgkMainRefStorage("alien://folder=ShuttleReference");
154 TString AliShuttle::fgkLocalRefStorage("local://LocalReferenceStorage");
155
156 Bool_t AliShuttle::fgkProcessDCS(kTRUE); 
157
158
159 const char* AliShuttle::fgkShuttleTempDir = gSystem->ExpandPathName("$ALICE_ROOT/SHUTTLE/temp");
160 const char* AliShuttle::fgkShuttleLogDir = gSystem->ExpandPathName("$ALICE_ROOT/SHUTTLE/log");
161
162 const char* AliShuttle::fgkDetectorName[AliShuttle::kNDetectors] = {"SPD", "SDD", "SSD", "TPC", "TRD", "TOF",
163         "PHOS", "CPV", "RICH", "EMCAL", "MUON_TRK", "MUON_TRG", "FMD", "ZDC", "PMD", "START", "VZERO"};
164
165 const char* AliShuttle::fgkDetectorCode[AliShuttle::kNDetectors] = {"SPD", "SDD", "SSD", "TPC", "TRD", "TOF",
166         "PHS", "CPV", "HMP", "EMC", "MCH", "MTR", "FMD", "ZDC", "PMD", "T00", "V00"};
167
168 //______________________________________________________________________________________________
169 AliShuttle::AliShuttle(const AliShuttleConfig* config,
170                 UInt_t timeout, Int_t retries):
171 fConfig(config),
172 fTimeout(timeout), fRetries(retries),
173 fPreprocessorMap(),
174 fLogbookEntry(0),
175 fCurrentDetector(""),
176 fStatusEntry(0),
177 fGridError(kFALSE),
178 fMonitoringMutex(0),
179 fLastActionTime(0)
180 {
181         //
182         // config: AliShuttleConfig used
183         // timeout: timeout used for AliDCSClient connection
184         // retries: the number of retries in case of connection error.
185         //
186
187         if (!fConfig->IsValid()) AliFatal("********** !!!!! Invalid configuration !!!!! **********");
188         for(int iSys=0;iSys<3;iSys++) {
189                 fServer[iSys]=0;
190                 fFESlist[iSys].SetOwner(kTRUE);
191         }
192         fPreprocessorMap.SetOwner(kTRUE);
193         
194         fMonitoringMutex = new TMutex();
195 }
196
197 //______________________________________________________________________________________________
198 AliShuttle::~AliShuttle()
199 {
200 // destructor
201
202         fPreprocessorMap.DeleteAll();
203         for(int iSys=0;iSys<3;iSys++)
204                 if(fServer[iSys]) {
205                         fServer[iSys]->Close();
206                         delete fServer[iSys];
207       fServer[iSys] = 0;
208                 }
209
210         if (fStatusEntry){
211                 delete fStatusEntry;
212                 fStatusEntry = 0;
213         }
214         
215         if (fMonitoringMutex) 
216         {
217                 delete fMonitoringMutex;
218                 fMonitoringMutex = 0;
219         }
220 }
221
222 //______________________________________________________________________________________________
223 void AliShuttle::RegisterPreprocessor(AliPreprocessor* preprocessor)
224 {
225         //
226         // Registers new AliPreprocessor.
227         // It uses GetName() for indentificator of the pre processor.
228         // The pre processor is registered it there isn't any other
229         // with the same identificator (GetName()).
230         //
231
232         if (fPreprocessorMap.GetValue(preprocessor->GetName())) {
233                 AliWarning(Form("AliPreprocessor %s is already registered!",
234                         preprocessor->GetName()));
235                 return;
236         }
237
238         fPreprocessorMap.Add(new TObjString(preprocessor->GetName()), preprocessor);
239 }
240 //______________________________________________________________________________________________
241 UInt_t AliShuttle::Store(const AliCDBPath& path, TObject* object,
242                 AliCDBMetaData* metaData, Int_t validityStart, Bool_t validityInfinite)
243 {
244   // Stores a CDB object in the storage for offline reconstruction. Objects that are not needed for
245   // offline reconstruction, but should be stored anyway (e.g. for debugging) should NOT be stored
246   // using this function. Use StoreReferenceData instead!
247   // It calls WriteToCDB function which perform actual storage
248
249         return WriteToCDB(fgkMainCDB, fgkLocalCDB, path, object,
250                                 metaData, validityStart, validityInfinite);
251
252 }
253
254 //______________________________________________________________________________________________
255 UInt_t AliShuttle::StoreReferenceData(const AliCDBPath& path, TObject* object, AliCDBMetaData* metaData)
256 {
257   // Stores a CDB object in the storage for reference data. This objects will not be available during
258   // offline reconstrunction. Use this function for reference data only!
259   // It calls WriteToCDB function which perform actual storage
260
261         return WriteToCDB(fgkMainRefStorage, fgkLocalRefStorage, path, object, metaData);
262
263 }
264
265 //______________________________________________________________________________________________
266 UInt_t AliShuttle::WriteToCDB(const char* mainUri, const char* localUri,
267                         const AliCDBPath& path, TObject* object, AliCDBMetaData* metaData,
268                         Int_t validityStart, Bool_t validityInfinite)
269 {
270   // write object into the CDB. Parameters are passed by Store and StoreReferenceData functions.
271   // The parameters are:
272   //   1) Uri of the main storage (Grid)
273   //   2) Uri of the backup storage (Local)
274   //   3) the object's path.
275   //   4) the object to be stored
276   //   5) the metaData to be associated with the object
277   //   6) the validity start run number w.r.t. the current run,
278   //      if the data is valid only for this run leave the default 0
279   //   7) specifies if the calibration data is valid for infinity (this means until updated),
280   //      typical for calibration runs, the default is kFALSE
281   //
282   // returns 0 if fail
283   //         1 if stored in main (Grid) storage
284   //         2 if stored in backup (Local) storage
285
286         const char* cdbType = (mainUri == fgkMainCDB) ? "CDB" : "Reference";
287
288         Int_t firstRun = GetCurrentRun() - validityStart;
289         if(firstRun < 0) {
290                 AliError("First valid run happens to be less than 0! Setting it to 0.");
291                 firstRun=0;
292         }
293
294         Int_t lastRun = -1;
295         if(validityInfinite) {
296                 lastRun = AliCDBRunRange::Infinity();
297         } else {
298                 lastRun = GetCurrentRun();
299         }
300
301         AliCDBId id(path, firstRun, lastRun, -1, -1);
302
303         if(! dynamic_cast<TObjString*> (metaData->GetProperty("RunUsed(TObjString)"))){
304                 TObjString runUsed = Form("%d", GetCurrentRun());
305                 metaData->SetProperty("RunUsed(TObjString)",&runUsed);
306         }
307
308         UInt_t result = 0;
309
310         if (!(AliCDBManager::Instance()->GetStorage(mainUri))) {
311                 AliError(Form("WriteToCDB - Cannot activate main %s storage", cdbType));
312         } else {
313                 result = (UInt_t) AliCDBManager::Instance()->GetStorage(mainUri)
314                                         ->Put(object, id, metaData);
315         }
316
317         if(!result) {
318
319                 Log(fCurrentDetector,
320                         Form("WriteToCDB - Problem with main %s storage. Putting <%s> into backup storage",
321                                 cdbType, path.GetPath().Data()));
322
323                 // Set Grid version to current run number, to ease retrieval later
324                 id.SetVersion(GetCurrentRun());
325
326                 result = AliCDBManager::Instance()->GetStorage(localUri)
327                                         ->Put(object, id, metaData);
328
329                 if(result) {
330                         result = 2;
331                         fGridError = kTRUE;
332                 }else{
333                         Log(fCurrentDetector, "WriteToCDB - Can't store data!");
334                 }
335         }
336
337         return result;
338
339 }
340
341 //______________________________________________________________________________________________
342 AliShuttleStatus* AliShuttle::ReadShuttleStatus()
343 {
344 // Reads the AliShuttleStatus from the CDB
345
346         if (fStatusEntry){
347                 delete fStatusEntry;
348                 fStatusEntry = 0;
349         }
350
351         fStatusEntry = AliCDBManager::Instance()->GetStorage(AliShuttle::GetLocalCDB())
352                 ->Get(Form("/SHUTTLE/STATUS/%s", fCurrentDetector.Data()), GetCurrentRun());
353
354         if (!fStatusEntry) return 0;
355         fStatusEntry->SetOwner(1);
356
357         AliShuttleStatus* status = dynamic_cast<AliShuttleStatus*> (fStatusEntry->GetObject());
358         if (!status) {
359                 AliError("Invalid object stored to CDB!");
360                 return 0;
361         }
362
363         return status;
364 }
365
366 //______________________________________________________________________________________________
367 Bool_t AliShuttle::WriteShuttleStatus(AliShuttleStatus* status)
368 {
369 // writes the status for one subdetector
370
371         if (fStatusEntry){
372                 delete fStatusEntry;
373                 fStatusEntry = 0;
374         }
375
376         Int_t run = GetCurrentRun();
377
378         AliCDBId id(AliCDBPath("SHUTTLE", "STATUS", fCurrentDetector), run, run);
379
380         fStatusEntry = new AliCDBEntry(status, id, new AliCDBMetaData);
381         fStatusEntry->SetOwner(1);
382
383         UInt_t result = AliCDBManager::Instance()->GetStorage(fgkLocalCDB)->Put(fStatusEntry);
384
385         if (!result) {
386                 AliError(Form("WriteShuttleStatus for %s, run %d failed", fCurrentDetector.Data(), run));
387                 return kFALSE;
388         }
389
390         return kTRUE;
391 }
392
393 //______________________________________________________________________________________________
394 void AliShuttle::UpdateShuttleStatus(AliShuttleStatus::Status newStatus, Bool_t increaseCount)
395 {
396   // changes the AliShuttleStatus for the given detector and run to the given status
397
398         if (!fStatusEntry){
399                 AliError("UNEXPECTED: fStatusEntry empty");
400                 return;
401         }
402
403         AliShuttleStatus* status = dynamic_cast<AliShuttleStatus*> (fStatusEntry->GetObject());
404
405         if (!status){
406                 AliError("UNEXPECTED: status could not be read from current CDB entry");
407                 return;
408         }
409
410         TString actionStr;
411         actionStr.Form("UpdateShuttleStatus - %s: Changing state from %s to %s", fCurrentDetector.Data(),
412                                 status->GetStatusName(), status->GetStatusName(newStatus));
413         Log("SHUTTLE", actionStr);
414         SetLastAction(actionStr);
415
416         status->SetStatus(newStatus);
417         if (increaseCount) status->IncreaseCount();
418
419         AliCDBManager::Instance()->GetStorage(fgkLocalCDB)->Put(fStatusEntry);
420 }
421 //______________________________________________________________________________________________
422 Bool_t AliShuttle::ContinueProcessing()
423 {
424 // this function reads the AliShuttleStatus information from CDB and
425 // checks if the processing should be continued
426 // if yes it returns kTRUE and updates the AliShuttleStatus with nextStatus
427
428         if(!GetDetCode(fCurrentDetector)) {
429                 Log("SHUTTLE", Form("ContinueProcessing - %s: unknown detector",
430                                 fCurrentDetector.Data()));
431                 return kFALSE;
432         }
433
434         AliShuttleLogbookEntry::Status entryStatus =
435                 fLogbookEntry->GetDetectorStatus(GetDetCode(fCurrentDetector));
436
437         if(entryStatus != AliShuttleLogbookEntry::kUnprocessed) {
438                 Log("SHUTTLE", Form("ContinueProcessing - %s is already %s",
439                                 fCurrentDetector.Data(),
440                                 fLogbookEntry->GetDetectorStatusName(entryStatus)));
441                 return kFALSE;
442         }
443
444         // if we get here, according to Shuttle logbook subdetector is in UNPROCESSED state
445         AliShuttleStatus* status = ReadShuttleStatus();
446         if (!status) {
447                 // first time
448                 Log("SHUTTLE", Form("ContinueProcessing - %s: Processing first time",
449                                 fCurrentDetector.Data()));
450                 status = new AliShuttleStatus(AliShuttleStatus::kStarted);
451                 return WriteShuttleStatus(status);
452         }
453
454         // The following two cases shouldn't happen if Shuttle Logbook was correctly updated.
455         // If it happens it may mean Logbook updating failed... let's do it now!
456         if (status->GetStatus() == AliShuttleStatus::kDone ||
457             status->GetStatus() == AliShuttleStatus::kFailed){
458                 Log("SHUTTLE", Form("ContinueProcessing - %s is already %s. Updating Shuttle Logbook",
459                                         fCurrentDetector.Data(),
460                                         status->GetStatusName(status->GetStatus())));
461                 UpdateShuttleLogbook(fCurrentDetector.Data(),
462                                         status->GetStatusName(status->GetStatus()));
463                 return kFALSE;
464         }
465
466         if (status->GetStatus() == AliShuttleStatus::kStoreFailed) {
467                 Log("SHUTTLE",
468                         Form("ContinueProcessing - %s: Grid storage of one or more objects failed. Trying again now",
469                                 fCurrentDetector.Data()));
470                 if(TryToStoreAgain()){
471                         Log(fCurrentDetector.Data(), "ContinueProcessing - All objects successfully stored into OCDB");
472                         UpdateShuttleStatus(AliShuttleStatus::kDone);
473                         UpdateShuttleLogbook(fCurrentDetector.Data(), "DONE");
474                 } else {
475                         Log("SHUTTLE",
476                                 Form("ContinueProcessing - %s: Grid storage failed again",
477                                         fCurrentDetector.Data()));
478                 }
479                 return kFALSE;
480         }
481
482         // if we get here, there is a restart
483
484         // abort conditions
485         // TODO we should add two counters, one for PP and one for DCS!
486         if (status->GetCount() >= fConfig->GetMaxRetries()) {
487                 Log("SHUTTLE",
488                         Form("ContinueProcessing - %s failed %d times in status %s - Updating Shuttle Logbook",
489                                 fCurrentDetector.Data(),
490                                 status->GetCount(), status->GetStatusName()));
491                 UpdateShuttleLogbook(fCurrentDetector.Data(), "FAILED");
492                 return kFALSE;
493         }
494
495         Log("SHUTTLE", Form("ContinueProcessing - %s: restarting. Got stuck before in %s. Retry number %d.",
496                         fCurrentDetector.Data(),
497                         status->GetStatusName(), status->GetCount()));
498
499         UpdateShuttleStatus(AliShuttleStatus::kStarted, kTRUE);
500
501         return kTRUE;
502 }
503
504 //______________________________________________________________________________________________
505 Bool_t AliShuttle::Process(AliShuttleLogbookEntry* entry)
506 {
507         //
508         // Makes data retrieval for all detectors in the configuration.
509         // entry: Shuttle logbook entry, contains run paramenters and status of detectors
510         // (Unprocessed, Inactive, Failed or Done).
511         // Returns kFALSE in case of error occured and kTRUE otherwise
512         //
513
514         if(!entry) return kFALSE;
515
516         fLogbookEntry = entry;
517
518         if(fLogbookEntry->IsDone()){
519                 Log("SHUTTLE","Process - Shuttle is already DONE. Updating logbook");
520                 UpdateShuttleLogbook("shuttle_done");
521                 fLogbookEntry = 0;
522                 return kTRUE;
523         }
524
525
526         AliInfo(Form("\n\n \t\t\t^*^*^*^*^*^*^*^*^*^*^*^* run %d: START ^*^*^*^*^*^*^*^*^*^*^*^* \n",
527                                         GetCurrentRun()));
528
529         fLogbookEntry->Print("");
530
531         // Initialization
532         Bool_t hasError = kFALSE;
533         for(Int_t iSys=0;iSys<3;iSys++) fFESCalled[iSys]=kFALSE;
534
535         AliCDBStorage *mainCDBSto = AliCDBManager::Instance()->GetStorage(fgkMainCDB);
536         if(mainCDBSto) mainCDBSto->QueryCDB(GetCurrentRun());
537         AliCDBStorage *mainRefSto = AliCDBManager::Instance()->GetStorage(fgkMainRefStorage);
538         if(mainRefSto) mainRefSto->QueryCDB(GetCurrentRun());
539
540         // Loop on detectors in the configuration
541         TIter iter(fConfig->GetDetectors());
542         TObjString* aDetector = 0;
543
544         while ((aDetector = (TObjString*) iter.Next())) {
545                 fCurrentDetector = aDetector->String();
546
547                 if (!fConfig->HostProcessDetector(fCurrentDetector)) continue;
548
549                 AliPreprocessor* aPreprocessor =
550                         dynamic_cast<AliPreprocessor*> (fPreprocessorMap.GetValue(fCurrentDetector));
551                 if(!aPreprocessor){
552                         Log("SHUTTLE",Form("Process: no preprocessor registered. Skipping %s", fCurrentDetector.Data()));
553                         continue;
554                 }
555
556                 if (ContinueProcessing() == kFALSE) continue;
557
558                 AliInfo(Form("\n\n \t\t\t****** run %d - %s: START  ******",
559                                                 GetCurrentRun(), aDetector->GetName()));
560
561
562     Int_t pid = fork();
563
564     if (pid < 0)
565     {
566       Log("SHUTTLE", "ERROR: Forking failed");
567     }
568     else if (pid > 0)
569     {
570       // parent
571       AliInfo(Form("In parent process of %d - %s: Starting monitoring", GetCurrentRun(), aDetector->GetName()));
572
573       Long_t begin = time(0);
574
575       int status; // to be used with waitpid, on purpose an int (not Int_t)!
576       while (waitpid(pid, &status, WNOHANG) == 0)
577       {
578         Long_t expiredTime = time(0) - begin;
579
580         if (expiredTime > fConfig->GetPPTimeOut())
581         {
582           Log("SHUTTLE", Form("Process time out. Run time: %d seconds. Killing...", expiredTime));
583
584           kill(pid, 9);
585
586           hasError = kTRUE;
587
588           gSystem->Sleep(1000);
589         }
590         else
591         {
592           if (expiredTime % 60 == 0)
593             Log("SHUTTLE", Form("Checked process. Run time: %d seconds.", expiredTime));
594
595           gSystem->Sleep(1000);
596         }
597       }
598
599       AliInfo(Form("In parent process of %d - %s: Client has terminated.", GetCurrentRun(), aDetector->GetName()));
600
601       if (WIFEXITED(status))
602       {
603         Int_t returnCode = WEXITSTATUS(status);
604
605         Log("SHUTTLE", Form("The return code is %d", returnCode));
606
607         if (returnCode != 0)
608           hasError = kTRUE;
609       }
610     }
611     else if (pid == 0)
612     {
613       // client
614       AliInfo(Form("In client process of %d - %s", GetCurrentRun(), aDetector->GetName()));
615
616       UInt_t result = ProcessCurrentDetector();
617
618       Int_t returnCode = 0; // will be set to 1 in case of an error
619
620       if (!result) {
621         returnCode = 1;
622         AliInfo(Form("\n \t\t\t****** run %d - %s: PREPROCESSOR ERROR ****** \n\n",
623                 GetCurrentRun(), aDetector->GetName()));
624       }
625       else if(result == 2) {
626         AliInfo(Form("\n \t\t\t****** run %d - %s: STORAGE ERROR ****** \n\n",
627                 GetCurrentRun(), aDetector->GetName()));
628       } else {
629         AliInfo(Form("\n \t\t\t****** run %d - %s: DONE ****** \n\n",
630                 GetCurrentRun(), aDetector->GetName()));
631       }
632
633       if (result > 0)
634       {
635         // Process successful: Update time_processed field in FES logbooks!
636         if(fFESCalled[kDAQ]) {
637           if (UpdateDAQTable() == kFALSE)
638             returnCode = 1;
639           fFESlist[kDAQ].Clear();
640         }
641         //if(fFESCalled[kDCS]) {
642         //  if (UpdateDCSTable(aDetector->GetName()) == kFALSE)
643         //    returnCode = 1;
644         //  fFESlist[kDCS].Clear();
645         //}
646         //if(fFESCalled[kHLT]) {
647         //  if (UpdateHLTTable(aDetector->GetName()) == kFALSE)
648         //    returnCode = 1;
649         //      fFESlist[kHLT].Clear();
650         //}
651       }
652
653       AliInfo(Form("Client process of %d - %s is exiting now with %d.", GetCurrentRun(), aDetector->GetName(), returnCode));
654
655       // the client exits here
656       gSystem->Exit(returnCode);
657
658       AliError("We should never get here!!!");
659     }
660         }
661
662         AliInfo(Form("\n\n \t\t\t^*^*^*^*^*^*^*^*^*^*^*^* run %d: FINISH ^*^*^*^*^*^*^*^*^*^*^*^* \n",
663                                                         GetCurrentRun()));
664
665         //check if shuttle is done for this run, if so update logbook
666         TObjArray checkEntryArray;
667         checkEntryArray.SetOwner(1);
668         TString whereClause = Form("where run=%d",GetCurrentRun());
669         if(QueryShuttleLogbook(whereClause.Data(), checkEntryArray)) {
670
671                 AliShuttleLogbookEntry* checkEntry = dynamic_cast<AliShuttleLogbookEntry*>
672                                                         (checkEntryArray.At(0));
673
674                 if(checkEntry && checkEntry->IsDone()){
675                         Log("SHUTTLE","Process - Shuttle is DONE. Updating logbook");
676                         UpdateShuttleLogbook("shuttle_done");
677                 }
678         }
679
680         fLogbookEntry = 0;
681
682         return hasError == kFALSE;
683 }
684
685 //______________________________________________________________________________________________
686 UInt_t AliShuttle::ProcessCurrentDetector()
687 {
688         //
689         // Makes data retrieval just for a specific detector (fCurrentDetector).
690         // Threre should be a configuration for this detector.
691
692         AliInfo(Form("Retrieving values for %s, run %d", fCurrentDetector.Data(), GetCurrentRun()));
693
694         UpdateShuttleStatus(AliShuttleStatus::kDCSStarted);
695
696         TString host(fConfig->GetDCSHost(fCurrentDetector));
697         Int_t port = fConfig->GetDCSPort(fCurrentDetector);
698
699         TIter iter(fConfig->GetDCSAliases(fCurrentDetector));
700         TObjString* anAlias;
701         TMap aliasMap;
702         aliasMap.SetOwner(1);
703
704         Bool_t aDCSError = kFALSE;
705         fGridError = kFALSE;
706
707         while ((anAlias = (TObjString*) iter.Next())) {
708                 TObjArray *valueSet = new TObjArray();
709                 valueSet->SetOwner(1);
710                 // TODO Test only... I've added a flag that allows to
711                 // exclude DCS archive DB query
712                 if(fgkProcessDCS){
713                         AliInfo("Querying DCS archive DB data...");
714                         aDCSError = (GetValueSet(host, port, anAlias->String(), valueSet) == 0);
715                 } else {
716                         AliInfo(Form("Skipping DCS processing. Port = %d",port));
717                         aDCSError = kFALSE;
718                 }
719                 if(!aDCSError) {
720                         aliasMap.Add(anAlias->Clone(), valueSet);
721                 }else{
722                         Log(fCurrentDetector, Form("ProcessCurrentDetector - Error while retrieving alias %s",
723                                         anAlias->GetName()));
724                         UpdateShuttleStatus(AliShuttleStatus::kDCSError, kTRUE);
725                         aliasMap.DeleteAll();
726                         return 0;
727                 }
728         }
729
730         // DCS Archive DB processing successful. Call Preprocessor!
731         UpdateShuttleStatus(AliShuttleStatus::kPPStarted);
732
733         AliPreprocessor* aPreprocessor =
734                 dynamic_cast<AliPreprocessor*> (fPreprocessorMap.GetValue(fCurrentDetector));
735
736         aPreprocessor->Initialize(GetCurrentRun(), GetCurrentStartTime(), GetCurrentEndTime());
737         UInt_t aPPResult = aPreprocessor->Process(&aliasMap);
738
739         UInt_t returnValue = 0;
740         if (aPPResult == 0) { // Preprocessor error
741                 UpdateShuttleStatus(AliShuttleStatus::kPPError);
742                 returnValue = 0;
743         } else if (fGridError == kFALSE) { // process and Grid storage ok!
744                 UpdateShuttleStatus(AliShuttleStatus::kDone);
745                 UpdateShuttleLogbook(fCurrentDetector, "DONE");
746                 Log(fCurrentDetector.Data(),
747                         "ProcessCurrentDetector - Preprocessor and Grid storage ended successfully");
748                 returnValue = 1;
749         } else { // Grid storage error (process ok, but object put in local storage)
750                 UpdateShuttleStatus(AliShuttleStatus::kStoreFailed);
751                 returnValue = 2;
752         }
753
754         aliasMap.DeleteAll();
755
756         return returnValue;
757 }
758
759 //______________________________________________________________________________________________
760 Bool_t AliShuttle::QueryShuttleLogbook(const char* whereClause,
761                 TObjArray& entries)
762 {
763 // Query DAQ's Shuttle logbook and fills detector status object.
764 // Call QueryRunParameters to query DAQ logbook for run parameters.
765
766         // check connection, in case connect
767         if(!Connect(kDAQ)) return kFALSE;
768
769         TString sqlQuery;
770         sqlQuery = Form("select * from logbook_shuttle %s order by run", whereClause);
771
772         TSQLResult* aResult = fServer[kDAQ]->Query(sqlQuery);
773         if (!aResult) {
774                 AliError(Form("Can't execute query <%s>!", sqlQuery.Data()));
775                 return kFALSE;
776         }
777
778         if(aResult->GetRowCount() == 0) {
779                 if(sqlQuery.Contains("where shuttle_done=0")){
780                         Log("SHUTTLE", "QueryShuttleLogbook - All runs in Shuttle Logbook are already DONE");
781                         delete aResult;
782                         return kTRUE;
783                 } else {
784                         AliError("No entries in Shuttle Logbook match request");
785                         delete aResult;
786                         return kFALSE;
787                 }
788         }
789
790         // TODO Check field count!
791         const UInt_t nCols = 24;
792         if (aResult->GetFieldCount() != (Int_t) nCols) {
793                 AliError("Invalid SQL result field number!");
794                 delete aResult;
795                 return kFALSE;
796         }
797
798         entries.SetOwner(1);
799
800         TSQLRow* aRow;
801         while ((aRow = aResult->Next())) {
802                 TString runString(aRow->GetField(0), aRow->GetFieldLength(0));
803                 Int_t run = runString.Atoi();
804
805                 UInt_t startTime, endTime;
806                 if(!QueryRunParameters(run, startTime, endTime)) continue;
807
808                 const UInt_t nDet = AliShuttle::kNDetectors;
809                 AliShuttleLogbookEntry::Status detStatus[nDet];
810
811                 // loop on detectors
812                 for(UInt_t ii = 0; ii < nCols; ii++){
813                         TString detCode(aResult->GetFieldName(ii));
814                         Int_t detPos = AliShuttle::GetDetPos(detCode.Data());
815                         if(detPos < 0) continue;
816                         TString statusString(aRow->GetField(ii), aRow->GetFieldLength(ii));
817                         if(statusString == "UNPROCESSED"){
818                                 detStatus[detPos] = AliShuttleLogbookEntry::kUnprocessed;
819                         } else if (statusString == "INACTIVE") {
820                                 detStatus[detPos] = AliShuttleLogbookEntry::kInactive;
821                         } else if (statusString == "FAILED") {
822                                 detStatus[detPos] = AliShuttleLogbookEntry::kFailed;
823                         } else if (statusString == "DONE") {
824                                 detStatus[detPos] = AliShuttleLogbookEntry::kDone;
825                         }
826                 }
827
828                 entries.AddLast(new AliShuttleLogbookEntry(run, startTime, endTime, detStatus));
829                 delete aRow;
830         }
831
832         if(sqlQuery.Contains("where shuttle_done=0"))
833                 Log("SHUTTLE", Form("QueryShuttleLogbook - Found %d unprocessed runs in Shuttle Logbook",
834                                                         entries.GetEntriesFast()));
835         delete aResult;
836         return kTRUE;
837 }
838
839 //______________________________________________________________________________________________
840 Bool_t AliShuttle::QueryRunParameters(Int_t& run, UInt_t& startTime, UInt_t& endTime)
841 {
842 // Retrieve start time and end time for run in the DAQ logbook
843
844         // check connection, in case connect
845         if(!Connect(kDAQ)) return kFALSE;
846
847         TString sqlQuery;
848         sqlQuery = Form("select time_start, time_end from logbook where run=%d", run);
849
850         TSQLResult* aResult = fServer[kDAQ]->Query(sqlQuery);
851         if (!aResult) {
852                 AliError(Form("Can't execute query <%s>!", sqlQuery.Data()));
853                 return kFALSE;
854         }
855
856         if(aResult->GetRowCount() == 0) {
857                 Log("SHUTTLE", Form("QueryRunParameters - No entry in DAQ Logbook for run %d. Skipping", run));
858                 delete aResult;
859                 return kFALSE;
860         }
861
862         if(aResult->GetRowCount() > 1) {
863                 AliError(Form("More than one entry in DAQ Logbook for run %d. Skipping", run));
864                 delete aResult;
865                 return kFALSE;
866         }
867
868         TSQLRow* aRow;
869         while ((aRow = aResult->Next())) {
870
871                 TString startTimeString(aRow->GetField(0),
872                                 aRow->GetFieldLength(0));
873                 startTime = startTimeString.Atoi();
874                 TString endTimeString(aRow->GetField(1),
875                                 aRow->GetFieldLength(1));
876                 endTime = endTimeString.Atoi();
877
878                 if (!startTime || !endTime || startTime > endTime) {
879                         Log("SHUTTLE",
880                                 Form("QueryRunParameters - Invalid parameters for Run %d: startTime = %d, endTime = %d",
881                                         run, startTime, endTime));
882                         delete aRow;
883                         delete aResult;
884                         return kFALSE;
885                 }
886
887                 delete aRow;
888         }
889
890         delete aResult;
891         return kTRUE;
892 }
893
894 //______________________________________________________________________________________________
895 Bool_t AliShuttle::TryToStoreAgain()
896 {
897   // Called in case the detector failed to store the object in Grid OCDB
898   // It tries to store the object again, if it does not find more recent and overlapping objects
899   // Calls underlying TryToStoreAgain(const char*) function twice, for OCDB and Reference storage.
900
901         AliInfo("Trying to store OCDB data again...");
902         Bool_t resultCDB = TryToStoreAgain(fgkMainCDB);
903
904         AliInfo("Trying to store reference data again...");
905         Bool_t resultRef = TryToStoreAgain(fgkMainRefStorage);
906
907         return resultCDB && resultRef;
908 }
909
910 //______________________________________________________________________________________________
911 Bool_t AliShuttle::TryToStoreAgain(TString& gridURI)
912 {
913   // Called by TryToStoreAgain(), performs actual storage retry
914
915         TObjArray* gridIds=0;
916
917         Bool_t result = kTRUE;
918
919         const char* type = 0;
920         TString backupURI;
921         if(gridURI == fgkMainCDB) {
922                 type = "OCDB";
923                 backupURI = fgkLocalCDB;
924         } else if(gridURI == fgkMainRefStorage) {
925                 type = "reference";
926                 backupURI = fgkLocalRefStorage;
927         } else {
928                 AliError(Form("Invalid storage URI: %s", gridURI.Data()));
929                 return kFALSE;
930         }
931
932         AliCDBManager* man = AliCDBManager::Instance();
933
934         AliCDBStorage *gridSto = man->GetStorage(gridURI);
935         if(!gridSto) {
936                 Log(fCurrentDetector.Data(),
937                         Form("TryToStoreAgain - cannot activate main %s storage", type));
938                 return kFALSE;
939         }
940
941         gridIds = gridSto->GetQueryCDBList();
942
943         // get objects previously stored in local CDB
944         AliCDBStorage *backupSto = man->GetStorage(backupURI);
945         AliCDBPath aPath(fCurrentDetector,"*","*");
946         // Local objects were stored with current run as Grid version!
947         TList* localEntries = backupSto->GetAll(aPath.GetPath(), GetCurrentRun(), GetCurrentRun());
948         localEntries->SetOwner(1);
949
950         // loop on local stored objects
951         TIter localIter(localEntries);
952         AliCDBEntry *aLocEntry = 0;
953         while((aLocEntry = dynamic_cast<AliCDBEntry*> (localIter.Next()))){
954                 aLocEntry->SetOwner(1);
955                 AliCDBId aLocId = aLocEntry->GetId();
956                 aLocEntry->SetVersion(-1);
957                 aLocEntry->SetSubVersion(-1);
958
959                 // loop on Grid valid Id's
960                 Bool_t store = kTRUE;
961                 TIter gridIter(gridIds);
962                 AliCDBId* aGridId = 0;
963                 while((aGridId = dynamic_cast<AliCDBId*> (gridIter.Next()))){
964                         // If local object is valid up to infinity we store it anyway
965                         // TODO This does not work! It may hide more recent objects...
966                         if(aLocId.GetLastRun() == AliCDBRunRange::Infinity()) {
967                                 // TODO Check that it won't hide more recent files! how????
968                                 break;
969                         }
970                         if(aGridId->GetPath() != aLocId.GetPath()) continue;
971                         // skip all objects valid up to infinity
972                         if(aGridId->GetLastRun() == AliCDBRunRange::Infinity()) continue;
973                         // if we get here, it means there's already some more recent object stored on Grid!
974                         store = kFALSE;
975                         break;
976                 }
977
978                 if(!store){
979                         Log(fCurrentDetector.Data(),
980                                 Form("TryToStoreAgain - A more recent object already exists in %s storage: <%s>",
981                                         type, aGridId->ToString().Data()));
982                         // removing local filename...
983                         // TODO maybe it's better not to remove it, it was not copied to the Grid!
984                         TString filename;
985                         backupSto->IdToFilename(aLocId, filename);
986                         AliInfo(Form("Removing local file %s", filename.Data()));
987                         gSystem->Exec(Form("rm %s",filename.Data()));
988                         continue;
989                 }
990
991                 // If we get here, the file can be stored!
992                 Bool_t storeOk = gridSto->Put(aLocEntry);
993                 if(storeOk){
994                         Log(fCurrentDetector.Data(),
995                                 Form("TryToStoreAgain - Object <%s> successfully put into %s storage",
996                                         aLocId.ToString().Data(), type));
997
998                         // removing local filename...
999                         TString filename;
1000                         backupSto->IdToFilename(aLocId, filename);
1001                         AliInfo(Form("Removing local file %s", filename.Data()));
1002                         gSystem->Exec(Form("rm %s", filename.Data()));
1003                         continue;
1004                 } else  {
1005                         Log(fCurrentDetector.Data(),
1006                                 Form("TryToStoreAgain - Grid %s storage of object <%s> failed again",
1007                                         type, aLocId.ToString().Data()));
1008                         result = kFALSE;
1009                 }
1010         }
1011         localEntries->Clear();
1012
1013         return result;
1014 }
1015
1016 //______________________________________________________________________________________________
1017 Bool_t AliShuttle::GetValueSet(const char* host, Int_t port, const char* alias,
1018                                 TObjArray* valueSet)
1019 {
1020 // Retrieve all "alias" data points from the DCS server
1021 // host, port: TSocket connection parameters
1022 // alias: name of the alias
1023 // valueSet: array of retrieved AliDCSValue's
1024
1025         AliDCSClient client(host, port, fTimeout, fRetries);
1026         if (!client.IsConnected()) {
1027                 return kFALSE;
1028         }
1029
1030         Int_t result = client.GetAliasValues(alias,
1031                 GetCurrentStartTime(), GetCurrentEndTime(), valueSet);
1032
1033         if (result < 0) {
1034                 Log(fCurrentDetector.Data(), Form("GetValueSet - Can't get '%s'! Reason: %s",
1035                         alias, AliDCSClient::GetErrorString(result)));
1036
1037                 if (result == AliDCSClient::fgkServerError) {
1038                         Log(fCurrentDetector.Data(), Form("GetValueSet - Server error: %s",
1039                                 client.GetServerError().Data()));
1040                 }
1041
1042                 return kFALSE;
1043         }
1044
1045         return kTRUE;
1046 }
1047
1048 //______________________________________________________________________________________________
1049 const char* AliShuttle::GetFile(Int_t system, const char* detector,
1050                 const char* id, const char* source)
1051 {
1052 // Get calibration file from file exchange servers
1053 // calls specific getter according to system index (kDAQ, kDCS, kHLT)
1054
1055         switch(system){
1056                 case kDAQ:
1057                         return GetDAQFileName(detector, id, source);
1058                         break;
1059                 case kDCS:
1060                         return GetDCSFileName(detector, id, source);
1061                         break;
1062                 case kHLT:
1063                         return GetHLTFileName(detector, id, source);
1064                         break;
1065                 default:
1066                         AliError(Form("No valid system index: %d",system));
1067         }
1068
1069         return 0;
1070 }
1071
1072 //______________________________________________________________________________________________
1073 TList* AliShuttle::GetFileSources(Int_t system, const char* detector, const char* id)
1074 {
1075 // Get sources producing the condition file Id from file exchange servers
1076 // calls specific getter according to system index (kDAQ, kDCS, kHLT)
1077
1078         switch(system){
1079                 case kDAQ:
1080                         return GetDAQFileSources(detector, id);
1081                         break;
1082                 case kDCS:
1083                         return GetDCSFileSources(detector, id);
1084                         break;
1085                 case kHLT:
1086                         return GetHLTFileSources(detector, id);
1087                         break;
1088                 default:
1089                         AliError(Form("No valid system index: %d",system));
1090         }
1091
1092         return NULL;
1093 }
1094
1095 //______________________________________________________________________________________________
1096 Bool_t AliShuttle::Connect(Int_t system)
1097 {
1098 // Connect to MySQL Server of the system's FES logbook
1099 // DAQ Logbook, Shuttle Logbook and DAQ FES Logbook are on the same host
1100
1101         // check connection: if already connected return
1102         if(fServer[system] && fServer[system]->IsConnected()) return kTRUE;
1103
1104         TString aFESlbHost= Form("mysql://%s", fConfig->GetFESlbHost(system));
1105
1106         fServer[system] = TSQLServer::Connect(aFESlbHost,
1107                         fConfig->GetFESlbUser(system),
1108                         fConfig->GetFESlbPass(system));
1109         if (!fServer[system] || !fServer[system]->IsConnected()) {
1110                 AliError(Form("Can't establish connection to FES logbook for %s",fkSystemNames[system]));
1111                 if(fServer[system]) delete fServer[system];
1112                 return kFALSE;
1113         }
1114
1115         // Get tables
1116         // TODO in the configuration should the table name be there too?
1117         TSQLResult* aResult=0;
1118         switch(system){
1119                 case kDAQ:
1120                         aResult = fServer[kDAQ]->GetTables("REFSYSLOG");
1121                         break;
1122                 case kDCS:
1123                         //aResult = fServer[kDCS]->GetTables("REFSYSLOG");
1124                         break;
1125                 case kHLT:
1126                         //aResult = fServer[kHLT]->GetTables("REFSYSLOG");
1127                         break;
1128                 default:
1129                         break;
1130         }
1131
1132         delete aResult;
1133         return kTRUE;
1134 }
1135
1136 //______________________________________________________________________________________________
1137 const char* AliShuttle::GetDAQFileName(const char* detector, const char* id, const char* source)
1138 {
1139 // Retrieves a file from the DAQ FES.
1140 // First queris the DAQ logbook_fs for the DAQ file name, using the run, detector, id and source info
1141 // then calls RetrieveDAQFile(DAQfilename) for actual copy to local disk
1142 // run: current run being processed (given by Logbook entry fLogbookEntry)
1143 // detector: comes from the Preprocessor name (must be converted into detector code with GetDetCode)
1144 // id: provided as a parameter by the Preprocessor
1145 // source: provided by the Preprocessor through GetFileSources function
1146
1147         // check connection, in case connect
1148         if(!Connect(kDAQ)){
1149                 Log(detector, "GetDAQFileName - Couldn't connect to DAQ Logbook");
1150                 return 0;
1151         }
1152
1153         // Query preparation
1154         TString sqlQueryStart = "select filePath from logbook_fs where";
1155         TString whereClause = Form("run=%d and detector=\"%s\" and fileId=\"%s\" and DAQsource=\"%s\"",
1156                                 GetCurrentRun(), GetDetCode(detector), id, source);
1157         TString sqlQuery = Form("%s %s", sqlQueryStart.Data(), whereClause.Data());
1158
1159         AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
1160
1161         // Query execution
1162         TSQLResult* aResult = 0;
1163         aResult = dynamic_cast<TSQLResult*> (fServer[kDAQ]->Query(sqlQuery));
1164         if (!aResult) {
1165                 Log(detector, Form("GetDAQFileName - Can't execute SQL query for: id = %s, source = %s",
1166                                 id, source));
1167                 return 0;
1168         }
1169
1170         if (aResult->GetRowCount() == 0) {
1171                 Log(detector,
1172                         Form("GetDAQFileName - No entry in FES table for: id = %s, source = %s",
1173                                 id, source));
1174                 delete aResult;
1175                 return 0;
1176         }
1177
1178         if (aResult->GetRowCount() >1) {
1179                 Log(detector,
1180                         Form("GetDAQFileName - More than one entry in FES table for: id = %s, source = %s",
1181                                 id, source));
1182                 delete aResult;
1183                 return 0;
1184         }
1185
1186         TSQLRow* aRow = dynamic_cast<TSQLRow*> (aResult->Next());
1187
1188         if(!aRow){
1189                 Log(detector, Form("GetDAQFileName - Empty set result from query: id = %s, source = %s",
1190                                 id, source));
1191                 delete aResult;
1192                 return 0;
1193         }
1194
1195         TString filePath(aRow->GetField(0), aRow->GetFieldLength(0));
1196
1197         delete aResult;
1198         delete aRow;
1199
1200         AliDebug(2, Form("filePath = %s",filePath.Data()));
1201
1202         // retrieved file is renamed to make it unique
1203         TString localFileName = Form("%s_%d_%s_%s.shuttle",
1204                                         detector, GetCurrentRun(), id, source);
1205
1206         // file retrieval from DAQ FES
1207         Bool_t result = RetrieveDAQFile(filePath.Data(), localFileName.Data());
1208         if(!result) {
1209                 Log(detector, Form("GetDAQFileName - Copy of file %s from DAQ FES failed", filePath.Data()));
1210                 return 0;
1211         } else {
1212                 AliInfo(Form("File %s copied from DAQ FES into %s/%s",
1213                         filePath.Data(), fgkShuttleTempDir, localFileName.Data()));
1214         }
1215
1216
1217         fFESCalled[kDAQ]=kTRUE;
1218         TObjString *fileParams = new TObjString(Form("%s_!?!_%s", id, source));
1219         fFESlist[kDAQ].Add(fileParams);
1220
1221         return localFileName.Data();
1222
1223 }
1224
1225 //______________________________________________________________________________________________
1226 Bool_t AliShuttle::RetrieveDAQFile(const char* daqFileName, const char* localFileName)
1227 {
1228
1229         // check temp directory: trying to cd to temp; if it does not exist, create it
1230         AliDebug(2, Form("Copy file %s from DAQ FES into folder %s and rename it as %s",
1231                         daqFileName,fgkShuttleTempDir, localFileName));
1232
1233         void* dir = gSystem->OpenDirectory(fgkShuttleTempDir);
1234         if (dir == NULL) {
1235                 if (gSystem->mkdir(fgkShuttleTempDir, kTRUE)) {
1236                         AliError(Form("Can't open directory <%s>", fgkShuttleTempDir));
1237                         return kFALSE;
1238                 }
1239
1240         } else {
1241                 gSystem->FreeDirectory(dir);
1242         }
1243
1244         TString baseDAQFESFolder = "DAQ";
1245         TString command = Form("scp %s@%s:%s/%s %s/%s",
1246                 fConfig->GetFESUser(kDAQ),
1247                 fConfig->GetFESHost(kDAQ),
1248                 baseDAQFESFolder.Data(),
1249                 daqFileName,
1250                 fgkShuttleTempDir,
1251                 localFileName);
1252
1253         AliDebug(2, Form("%s",command.Data()));
1254
1255         UInt_t nRetries = 0;
1256         UInt_t maxRetries = 3;
1257
1258         // copy!! if successful TSystem::Exec returns 0
1259         while(nRetries++ < maxRetries) {
1260                 AliDebug(2, Form("Trying to copy file. Retry # %d", nRetries));
1261                 if(gSystem->Exec(command.Data()) == 0) return kTRUE;
1262         }
1263
1264         return kFALSE;
1265
1266 }
1267
1268 //______________________________________________________________________________________________
1269 TList* AliShuttle::GetDAQFileSources(const char* detector, const char* id)
1270 {
1271 // Retrieves a file from the DCS FES.
1272
1273         // check connection, in case connect
1274         if(!Connect(kDAQ)){
1275                 Log(detector, "GetDAQFileSources - Couldn't connect to DAQ Logbook");
1276                 return 0;
1277         }
1278
1279         // Query preparation
1280         TString sqlQueryStart = "select DAQsource from logbook_fs where";
1281         TString whereClause = Form("run=%d and detector=\"%s\" and fileId=\"%s\"",
1282                                 GetCurrentRun(), GetDetCode(detector), id);
1283         TString sqlQuery = Form("%s %s", sqlQueryStart.Data(), whereClause.Data());
1284
1285         AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
1286
1287         // Query execution
1288         TSQLResult* aResult;
1289         aResult = fServer[kDAQ]->Query(sqlQuery);
1290         if (!aResult) {
1291                 Log(detector, Form("GetDAQFileSources - Can't execute SQL query for id: %s", id));
1292                 return 0;
1293         }
1294
1295         if (aResult->GetRowCount() == 0) {
1296                 Log(detector,
1297                         Form("GetDAQFileSources - No entry in FES table for id: %s", id));
1298                 delete aResult;
1299                 return 0;
1300         }
1301
1302         TSQLRow* aRow;
1303         TList *list = new TList();
1304         list->SetOwner(1);
1305
1306         while((aRow = aResult->Next())){
1307
1308                 TString daqSource(aRow->GetField(0), aRow->GetFieldLength(0));
1309                 AliDebug(2, Form("daqSource = %s", daqSource.Data()));
1310                 list->Add(new TObjString(daqSource));
1311                 delete aRow;
1312         }
1313         delete aResult;
1314
1315         return list;
1316
1317 }
1318
1319 //______________________________________________________________________________________________
1320 const char* AliShuttle::GetDCSFileName(const char* /*detector*/, const char* /*id*/, const char* /*source*/){
1321 // Retrieves a file from the DCS FES.
1322
1323 return "You're in DCS";
1324
1325 }
1326
1327 //______________________________________________________________________________________________
1328 TList* AliShuttle::GetDCSFileSources(const char* /*detector*/, const char* /*id*/){
1329 // Retrieves a file from the DCS FES.
1330
1331 return NULL;
1332
1333 }
1334
1335 //______________________________________________________________________________________________
1336 const char* AliShuttle::GetHLTFileName(const char* /*detector*/, const char* /*id*/, const char* /*source*/){
1337 // Retrieves a file from the HLT FES.
1338
1339 return "You're in HLT";
1340
1341 }
1342
1343 //______________________________________________________________________________________________
1344 TList* AliShuttle::GetHLTFileSources(const char* /*detector*/, const char* /*id*/){
1345 // Retrieves a file from the HLT FES.
1346
1347 return NULL;
1348
1349 }
1350
1351 //______________________________________________________________________________________________
1352 Bool_t AliShuttle::UpdateDAQTable()
1353 {
1354 // Update DAQ table filling time_processed field in all rows corresponding to current run and detector
1355
1356         // check connection, in case connect
1357         if(!Connect(kDAQ)){
1358                 Log(fCurrentDetector, "UpdateDAQTable - Couldn't connect to DAQ Logbook");
1359                 return kFALSE;
1360         }
1361
1362         TTimeStamp now; // now
1363
1364         // Loop on FES list entries
1365         TIter iter(&fFESlist[kDAQ]);
1366         TObjString *aFESentry=0;
1367         while((aFESentry = dynamic_cast<TObjString*> (iter.Next()))){
1368                 TString aFESentrystr = aFESentry->String();
1369                 TObjArray *aFESarray = aFESentrystr.Tokenize("_!?!_");
1370                 if(!aFESarray || aFESarray->GetEntries() != 2 ) {
1371                         Log(fCurrentDetector, Form("UpdateDAQTable - error updating FES entry. Check string: <%s>",
1372                                 aFESentrystr.Data()));
1373                         if(aFESarray) delete aFESarray;
1374                         return kFALSE;
1375                 }
1376                 const char* fileId = ((TObjString*) aFESarray->At(0))->GetName();
1377                 const char* daqSource = ((TObjString*) aFESarray->At(1))->GetName();
1378                 TString whereClause = Form("where run=%d and detector=\"%s\" and fileId=\"%s\" and DAQsource=\"%s\";",
1379                         GetCurrentRun(), GetDetCode(fCurrentDetector), fileId, daqSource);
1380
1381                 delete aFESarray;
1382
1383                 TString sqlQuery = Form("update logbook_fs set time_processed=%d %s", now.GetSec(), whereClause.Data());
1384
1385                 AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
1386
1387                 // Query execution
1388                 TSQLResult* aResult;
1389                 aResult = dynamic_cast<TSQLResult*> (fServer[kDAQ]->Query(sqlQuery));
1390                 if (!aResult) {
1391                         Log(fCurrentDetector, Form("UpdateDAQTable - Can't execute SQL query <%s>", sqlQuery.Data()));
1392                         return kFALSE;
1393                 }
1394                 delete aResult;
1395         }
1396
1397         return kTRUE;
1398 }
1399
1400
1401 //______________________________________________________________________________________________
1402 Bool_t AliShuttle::UpdateShuttleLogbook(const char* detector, const char* status)
1403 {
1404 // Update Shuttle logbook filling detector or shuttle_done column
1405 // ex. of usage: UpdateShuttleLogbook("PHOS", "DONE") or UpdateShuttleLogbook("shuttle_done")
1406
1407         // check connection, in case connect
1408         if(!Connect(kDAQ)){
1409                 Log("SHUTTLE", "UpdateShuttleLogbook - Couldn't connect to DAQ Logbook.");
1410                 return kFALSE;
1411         }
1412
1413         TString detName(detector);
1414         TString setClause;
1415         if(detName == "shuttle_done") {
1416                 setClause = "set shuttle_done=1";
1417         } else {
1418                 TString detCode = GetDetCode(detector);
1419                 if(detCode.IsNull()) {
1420                         Log("SHUTTLE", Form("UpdateShuttleLogbook - Unknown detector %s", detector));
1421                         return kFALSE;
1422                 }
1423                 TString statusStr(status);
1424                 if(statusStr.Contains("done", TString::kIgnoreCase) ||
1425                    statusStr.Contains("failed", TString::kIgnoreCase)){
1426                         setClause = Form("set %s=\"%s\"", detCode.Data(), status);
1427                 } else {
1428                         Log("SHUTTLE",
1429                                 Form("UpdateShuttleLogbook - Invalid status <%s> for detector %s",
1430                                         status, detector));
1431                         return kFALSE;
1432                 }
1433         }
1434
1435         TString whereClause = Form("where run=%d", GetCurrentRun());
1436
1437         TString sqlQuery = Form("update logbook_shuttle %s %s",
1438                                         setClause.Data(), whereClause.Data());
1439
1440         AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
1441
1442         // Query execution
1443         TSQLResult* aResult;
1444         aResult = dynamic_cast<TSQLResult*> (fServer[kDAQ]->Query(sqlQuery));
1445         if (!aResult) {
1446                 Log("SHUTTLE", Form("UpdateShuttleLogbook - Can't execute query <%s>", sqlQuery.Data()));
1447                 return kFALSE;
1448         }
1449         delete aResult;
1450
1451         return kTRUE;
1452 }
1453
1454 //______________________________________________________________________________________________
1455 Int_t AliShuttle::GetCurrentRun() const
1456 {
1457 // Get current run from logbook entry
1458
1459         return fLogbookEntry ? fLogbookEntry->GetRun() : -1;
1460 }
1461
1462 //______________________________________________________________________________________________
1463 UInt_t AliShuttle::GetCurrentStartTime() const
1464 {
1465 // get current start time
1466
1467         return fLogbookEntry ? fLogbookEntry->GetStartTime() : 0;
1468 }
1469
1470 //______________________________________________________________________________________________
1471 UInt_t AliShuttle::GetCurrentEndTime() const
1472 {
1473 // get current end time from logbook entry
1474
1475         return fLogbookEntry ? fLogbookEntry->GetEndTime() : 0;
1476 }
1477
1478 //______________________________________________________________________________________________
1479 const char* AliShuttle::GetDetCode(const char* detector){
1480 // Return detector code
1481
1482         for(UInt_t iDet=0; iDet < kNDetectors; iDet++){
1483                 if(!strcmp(fgkDetectorName[iDet], detector)) return fgkDetectorCode[iDet];
1484         }
1485
1486         AliErrorClass(Form("Unknown detector: %s",detector));
1487         return 0;
1488 }
1489
1490 //______________________________________________________________________________________________
1491 const char* AliShuttle::GetDetCode(UInt_t detPos){
1492 // Return detector code
1493
1494         if( detPos >= kNDetectors) {
1495                 AliErrorClass(Form("Invalid parameter: %d", detPos));
1496                 return 0;
1497         }
1498         return fgkDetectorCode[detPos];
1499 }
1500
1501 //______________________________________________________________________________________________
1502 const Int_t AliShuttle::GetDetPos(const char* detCode){
1503 // Return detector position in the detector code array
1504
1505         for(UInt_t iDet=0; iDet < kNDetectors; iDet++){
1506                 if(!strcmp(fgkDetectorCode[iDet], detCode)) return iDet;
1507         }
1508         return -1;
1509 }
1510
1511 //______________________________________________________________________________________________
1512 void AliShuttle::Log(const char* detector, const char* message)
1513 {
1514 // Fill log string with a message
1515
1516         void* dir = gSystem->OpenDirectory(fgkShuttleLogDir);
1517         if (dir == NULL) {
1518                 if (gSystem->mkdir(fgkShuttleLogDir, kTRUE)) {
1519                         AliError(Form("Can't open directory <%s>", fgkShuttleTempDir));
1520                         return;
1521                 }
1522
1523         } else {
1524                 gSystem->FreeDirectory(dir);
1525         }
1526
1527         TString toLog = Form("%s (%d): %s - ", TTimeStamp(time(0)).AsString("s"), getpid(), detector);
1528         if(GetCurrentRun()>=0 ) toLog += Form("run %d - ", GetCurrentRun());
1529         toLog += Form("%s", message);
1530
1531         AliInfo(toLog.Data());
1532
1533         TString fileName;
1534         fileName.Form("%s/%s.log", fgkShuttleLogDir, detector);
1535         gSystem->ExpandPathName(fileName);
1536
1537         ofstream logFile;
1538         logFile.open(fileName, ofstream::out | ofstream::app);
1539
1540         if (!logFile.is_open()) {
1541                 AliError(Form("Could not open file %s", fileName.Data()));
1542                 return;
1543         }
1544
1545         logFile << toLog.Data() << "\n";
1546
1547         logFile.close();
1548 }
1549
1550 //______________________________________________________________________________________________
1551 Bool_t AliShuttle::Collect(Int_t run)
1552 {
1553         //
1554         // Collects conditions data for all UNPROCESSED run written to DAQ LogBook in case of run = -1 (default)
1555   // If a dedicated run is given this run is processed
1556   //
1557         // In operational mode, this is the Shuttle function triggered by the EOR signal.
1558         //
1559
1560   if (run == -1)
1561         Log("SHUTTLE","Collect - Shuttle called. Collecting conditions data for unprocessed runs");
1562   else
1563         Log("SHUTTLE", Form("Collect - Shuttle called. Collecting conditions data for run %d", run));
1564
1565         SetLastAction("Starting");
1566
1567         TString whereClause("where shuttle_done=0");
1568   if (run != -1)
1569     whereClause += Form(" and run=%d", run);
1570
1571         TObjArray shuttleLogbookEntries;
1572         if (!QueryShuttleLogbook(whereClause, shuttleLogbookEntries)) {
1573                 Log("SHUTTLE", "Collect - Can't retrieve entries from Shuttle logbook");
1574                 return kFALSE;
1575         }
1576
1577         if (!RetrieveConditionsData(shuttleLogbookEntries)) {
1578                 Log("SHUTTLE", "Collect - Process of at least one run failed");
1579                 return kFALSE;
1580         }
1581
1582   return kTRUE;
1583 }
1584
1585 //______________________________________________________________________________________________
1586 Bool_t AliShuttle::RetrieveConditionsData(const TObjArray& dateEntries)
1587 {
1588 // Retrieve conditions data for all runs that aren't processed yet
1589
1590         Bool_t hasError = kFALSE;
1591
1592         TIter iter(&dateEntries);
1593         AliShuttleLogbookEntry* anEntry;
1594
1595         while ((anEntry = (AliShuttleLogbookEntry*) iter.Next())){
1596                 if (!Process(anEntry)){
1597                         hasError = kTRUE;
1598                 }
1599         }
1600
1601         return hasError == kFALSE;
1602 }
1603
1604 //______________________________________________________________________________________________
1605 ULong_t AliShuttle::GetTimeOfLastAction() const
1606 {
1607         ULong_t tmp;
1608         
1609         fMonitoringMutex->Lock();
1610         
1611         tmp = fLastActionTime;
1612         
1613         fMonitoringMutex->UnLock();
1614         
1615         return tmp;
1616 }
1617
1618 //______________________________________________________________________________________________
1619 const TString AliShuttle::GetLastAction() const
1620 {
1621         // returns a string description of the last action
1622
1623         TString tmp;
1624         
1625         fMonitoringMutex->Lock();
1626         
1627         tmp = fLastAction;
1628         
1629         fMonitoringMutex->UnLock();
1630
1631         return tmp;     
1632 }
1633
1634 //______________________________________________________________________________________________
1635 void AliShuttle::SetLastAction(const char* action)
1636 {
1637         // updates the monitoring variables
1638         
1639         fMonitoringMutex->Lock();
1640         
1641         fLastAction = action;
1642         fLastActionTime = time(0);
1643         
1644         fMonitoringMutex->UnLock();
1645 }