New developments of the analysis framework - selectorised version of the manager...
[u/mrichter/AliRoot.git] / SHUTTLE / AliShuttle.cxx
1 /**************************************************************************
2  * Copyright(c) 1998-1999, ALICE Experiment at CERN, All rights reserved. *
3  *                                                                        *
4  * Author: The ALICE Off-line Project.                                    *
5  * Contributors are mentioned in the code where appropriate.              *
6  *                                                                        *
7  * Permission to use, copy, modify and distribute this software and its   *
8  * documentation strictly for non-commercial purposes is hereby granted   *
9  * without fee, provided that the above copyright notice appears in all   *
10  * copies and that both the copyright notice and this permission notice   *
11  * appear in the supporting documentation. The authors make no claims     *
12  * about the suitability of this software for any purpose. It is          *
13  * provided "as is" without express or implied warranty.                  *
14  **************************************************************************/
15
16 /*
17 $Log$
18 Revision 1.19  2006/11/06 14:23:04  jgrosseo
19 major update (Alberto)
20 o) reading of run parameters from the logbook
21 o) online offline naming conversion
22 o) standalone DCSclient package
23
24 Revision 1.18  2006/10/20 15:22:59  jgrosseo
25 o) Adding time out to the execution of the preprocessors: The Shuttle forks and the parent process monitors the child
26 o) Merging Collect, CollectAll, CollectNew function
27 o) Removing implementation of empty copy constructors (declaration still there!)
28
29 Revision 1.17  2006/10/05 16:20:55  jgrosseo
30 adapting to new CDB classes
31
32 Revision 1.16  2006/10/05 15:46:26  jgrosseo
33 applying to the new interface
34
35 Revision 1.15  2006/10/02 16:38:39  jgrosseo
36 update (alberto):
37 fixed memory leaks
38 storing of objects that failed to be stored to the grid before
39 interfacing of shuttle status table in daq system
40
41 Revision 1.14  2006/08/29 09:16:05  jgrosseo
42 small update
43
44 Revision 1.13  2006/08/15 10:50:00  jgrosseo
45 effc++ corrections (alberto)
46
47 Revision 1.12  2006/08/08 14:19:29  jgrosseo
48 Update to shuttle classes (Alberto)
49
50 - Possibility to set the full object's path in the Preprocessor's and
51 Shuttle's  Store functions
52 - Possibility to extend the object's run validity in the same classes
53 ("startValidity" and "validityInfinite" parameters)
54 - Implementation of the StoreReferenceData function to store reference
55 data in a dedicated CDB storage.
56
57 Revision 1.11  2006/07/21 07:37:20  jgrosseo
58 last run is stored after each run
59
60 Revision 1.10  2006/07/20 09:54:40  jgrosseo
61 introducing status management: The processing per subdetector is divided into several steps,
62 after each step the status is stored on disk. If the system crashes in any of the steps the Shuttle
63 can keep track of the number of failures and skips further processing after a certain threshold is
64 exceeded. These thresholds can be configured in LDAP.
65
66 Revision 1.9  2006/07/19 10:09:55  jgrosseo
67 new configuration, accesst to DAQ FES (Alberto)
68
69 Revision 1.8  2006/07/11 12:44:36  jgrosseo
70 adding parameters for extended validity range of data produced by preprocessor
71
72 Revision 1.7  2006/07/10 14:37:09  jgrosseo
73 small fix + todo comment
74
75 Revision 1.6  2006/07/10 13:01:41  jgrosseo
76 enhanced storing of last sucessfully processed run (alberto)
77
78 Revision 1.5  2006/07/04 14:59:57  jgrosseo
79 revision of AliDCSValue: Removed wrapper classes, reduced storage size per value by factor 2
80
81 Revision 1.4  2006/06/12 09:11:16  jgrosseo
82 coding conventions (Alberto)
83
84 Revision 1.3  2006/06/06 14:26:40  jgrosseo
85 o) removed files that were moved to STEER
86 o) shuttle updated to follow the new interface (Alberto)
87
88 Revision 1.2  2006/03/07 07:52:34  hristov
89 New version (B.Yordanov)
90
91 Revision 1.6  2005/11/19 17:19:14  byordano
92 RetrieveDATEEntries and RetrieveConditionsData added
93
94 Revision 1.5  2005/11/19 11:09:27  byordano
95 AliShuttle declaration added
96
97 Revision 1.4  2005/11/17 17:47:34  byordano
98 TList changed to TObjArray
99
100 Revision 1.3  2005/11/17 14:43:23  byordano
101 import to local CVS
102
103 Revision 1.1.1.1  2005/10/28 07:33:58  hristov
104 Initial import as subdirectory in AliRoot
105
106 Revision 1.2  2005/09/13 08:41:15  byordano
107 default startTime endTime added
108
109 Revision 1.4  2005/08/30 09:13:02  byordano
110 some docs added
111
112 Revision 1.3  2005/08/29 21:15:47  byordano
113 some docs added
114
115 */
116
117 //
118 // This class is the main manager for AliShuttle. 
119 // It organizes the data retrieval from DCS and call the 
120 // interface methods of AliPreprocessor.
121 // For every detector in AliShuttleConfgi (see AliShuttleConfig),
122 // data for its set of aliases is retrieved. If there is registered
123 // AliPreprocessor for this detector then it will be used
124 // accroding to the schema (see AliPreprocessor).
125 // If there isn't registered AliPreprocessor than the retrieved
126 // data is stored automatically to the undelying AliCDBStorage.
127 // For detSpec is used the alias name.
128 //
129
130 #include "AliShuttle.h"
131
132 #include "AliCDBManager.h"
133 #include "AliCDBStorage.h"
134 #include "AliCDBId.h"
135 #include "AliCDBRunRange.h"
136 #include "AliCDBPath.h"
137 #include "AliCDBEntry.h"
138 #include "AliShuttleConfig.h"
139 #include "DCSClient/AliDCSClient.h"
140 #include "AliLog.h"
141 #include "AliPreprocessor.h"
142 #include "AliShuttleStatus.h"
143 #include "AliShuttleLogbookEntry.h"
144
145 #include <TSystem.h>
146 #include <TObject.h>
147 #include <TString.h>
148 #include <TTimeStamp.h>
149 #include <TObjString.h>
150 #include <TSQLServer.h>
151 #include <TSQLResult.h>
152 #include <TSQLRow.h>
153 #include <TMutex.h>
154
155 #include <fstream>
156
157 #include <sys/types.h>
158 #include <sys/wait.h>
159
160 ClassImp(AliShuttle)
161
162 TString AliShuttle::fgkMainCDB("alien://folder=ShuttleCDB");
163 TString AliShuttle::fgkLocalCDB("local://LocalShuttleCDB");
164 TString AliShuttle::fgkMainRefStorage("alien://folder=ShuttleReference");
165 TString AliShuttle::fgkLocalRefStorage("local://LocalReferenceStorage");
166
167 Bool_t AliShuttle::fgkProcessDCS(kTRUE); 
168
169 const char* AliShuttle::fgkShuttleTempDir = gSystem->ExpandPathName("$ALICE_ROOT/SHUTTLE/temp");
170 const char* AliShuttle::fgkShuttleLogDir = gSystem->ExpandPathName("$ALICE_ROOT/SHUTTLE/log");
171
172 //______________________________________________________________________________________________
173 AliShuttle::AliShuttle(const AliShuttleConfig* config,
174                 UInt_t timeout, Int_t retries):
175 fConfig(config),
176 fTimeout(timeout), fRetries(retries),
177 fPreprocessorMap(),
178 fLogbookEntry(0),
179 fCurrentDetector(),
180 fStatusEntry(0),
181 fGridError(kFALSE),
182 fMonitoringMutex(0),
183 fLastActionTime(0),
184 fLastAction()
185 {
186         //
187         // config: AliShuttleConfig used
188         // timeout: timeout used for AliDCSClient connection
189         // retries: the number of retries in case of connection error.
190         //
191
192         if (!fConfig->IsValid()) AliFatal("********** !!!!! Invalid configuration !!!!! **********");
193         for(int iSys=0;iSys<4;iSys++) {
194                 fServer[iSys]=0;
195                 if (iSys < 3)
196                         fFESlist[iSys].SetOwner(kTRUE);
197         }
198         fPreprocessorMap.SetOwner(kTRUE);
199
200         for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
201                 fFirstUnprocessed[iDet] = kFALSE;
202
203         fMonitoringMutex = new TMutex();
204 }
205
206 //______________________________________________________________________________________________
207 AliShuttle::~AliShuttle()
208 {
209 // destructor
210
211         fPreprocessorMap.DeleteAll();
212         for(int iSys=0;iSys<4;iSys++)
213                 if(fServer[iSys]) {
214                         fServer[iSys]->Close();
215                         delete fServer[iSys];
216                         fServer[iSys] = 0;
217                 }
218
219         if (fStatusEntry){
220                 delete fStatusEntry;
221                 fStatusEntry = 0;
222         }
223         
224         if (fMonitoringMutex) 
225         {
226                 delete fMonitoringMutex;
227                 fMonitoringMutex = 0;
228         }
229 }
230
231 //______________________________________________________________________________________________
232 void AliShuttle::RegisterPreprocessor(AliPreprocessor* preprocessor)
233 {
234         //
235         // Registers new AliPreprocessor.
236         // It uses GetName() for indentificator of the pre processor.
237         // The pre processor is registered it there isn't any other
238         // with the same identificator (GetName()).
239         //
240
241         const char* detName = preprocessor->GetName();
242         if(GetDetPos(detName) < 0)
243                 AliFatal(Form("********** !!!!! Invalid detector name: %s !!!!! **********", detName));
244
245         if (fPreprocessorMap.GetValue(detName)) {
246                 AliWarning(Form("AliPreprocessor %s is already registered!", detName));
247                 return;
248         }
249
250         fPreprocessorMap.Add(new TObjString(detName), preprocessor);
251 }
252 //______________________________________________________________________________________________
253 UInt_t AliShuttle::Store(const AliCDBPath& path, TObject* object,
254                 AliCDBMetaData* metaData, Int_t validityStart, Bool_t validityInfinite)
255 {
256   // Stores a CDB object in the storage for offline reconstruction. Objects that are not needed for
257   // offline reconstruction, but should be stored anyway (e.g. for debugging) should NOT be stored
258   // using this function. Use StoreReferenceData instead!
259   // It calls WriteToCDB function which perform actual storage
260
261         return WriteToCDB(fgkMainCDB, fgkLocalCDB, path, object,
262                                 metaData, validityStart, validityInfinite);
263
264 }
265
266 //______________________________________________________________________________________________
267 UInt_t AliShuttle::StoreReferenceData(const AliCDBPath& path, TObject* object, AliCDBMetaData* metaData)
268 {
269   // Stores a CDB object in the storage for reference data. This objects will not be available during
270   // offline reconstrunction. Use this function for reference data only!
271   // It calls WriteToCDB function which perform actual storage
272
273         return WriteToCDB(fgkMainRefStorage, fgkLocalRefStorage, path, object, metaData);
274
275 }
276
277 //______________________________________________________________________________________________
278 UInt_t AliShuttle::WriteToCDB(const char* mainUri, const char* localUri,
279                         const AliCDBPath& path, TObject* object, AliCDBMetaData* metaData,
280                         Int_t validityStart, Bool_t validityInfinite)
281 {
282   // write object into the CDB. Parameters are passed by Store and StoreReferenceData functions.
283   // The parameters are:
284   //   1) Uri of the main storage (Grid)
285   //   2) Uri of the backup storage (Local)
286   //   3) the object's path.
287   //   4) the object to be stored
288   //   5) the metaData to be associated with the object
289   //   6) the validity start run number w.r.t. the current run,
290   //      if the data is valid only for this run leave the default 0
291   //   7) specifies if the calibration data is valid for infinity (this means until updated),
292   //      typical for calibration runs, the default is kFALSE
293   //
294   // returns 0 if fail
295   //         1 if stored in main (Grid) storage
296   //         2 if stored in backup (Local) storage
297
298         const char* cdbType = (mainUri == fgkMainCDB) ? "CDB" : "Reference";
299
300         Int_t firstRun = GetCurrentRun() - validityStart;
301         if(firstRun < 0) {
302                 AliError("First valid run happens to be less than 0! Setting it to 0.");
303                 firstRun=0;
304         }
305
306         Int_t lastRun = -1;
307         if(validityInfinite) {
308                 lastRun = AliCDBRunRange::Infinity();
309         } else {
310                 lastRun = GetCurrentRun();
311         }
312
313         AliCDBId id(path, firstRun, lastRun, -1, -1);
314
315         if(! dynamic_cast<TObjString*> (metaData->GetProperty("RunUsed(TObjString)"))){
316                 TObjString runUsed = Form("%d", GetCurrentRun());
317                 metaData->SetProperty("RunUsed(TObjString)",&runUsed);
318         }
319
320         UInt_t result = 0;
321
322         if (!(AliCDBManager::Instance()->GetStorage(mainUri))) {
323                 AliError(Form("WriteToCDB - Cannot activate main %s storage", cdbType));
324         } else {
325                 result = (UInt_t) AliCDBManager::Instance()->GetStorage(mainUri)
326                                         ->Put(object, id, metaData);
327         }
328
329         if(!result) {
330
331                 Log(fCurrentDetector,
332                         Form("WriteToCDB - Problem with main %s storage. Putting <%s> into backup storage",
333                                 cdbType, path.GetPath().Data()));
334
335                 // Set Grid version to current run number, to ease retrieval later
336                 id.SetVersion(GetCurrentRun());
337
338                 result = AliCDBManager::Instance()->GetStorage(localUri)
339                                         ->Put(object, id, metaData);
340
341                 if(result) {
342                         result = 2;
343                         fGridError = kTRUE;
344                 }else{
345                         Log(fCurrentDetector, "WriteToCDB - Can't store data!");
346                 }
347         }
348
349         return result;
350
351 }
352
353 //______________________________________________________________________________________________
354 AliShuttleStatus* AliShuttle::ReadShuttleStatus()
355 {
356 // Reads the AliShuttleStatus from the CDB
357
358         if (fStatusEntry){
359                 delete fStatusEntry;
360                 fStatusEntry = 0;
361         }
362
363         fStatusEntry = AliCDBManager::Instance()->GetStorage(AliShuttle::GetLocalCDB())
364                 ->Get(Form("/SHUTTLE/STATUS/%s", fCurrentDetector.Data()), GetCurrentRun());
365
366         if (!fStatusEntry) return 0;
367         fStatusEntry->SetOwner(1);
368
369         AliShuttleStatus* status = dynamic_cast<AliShuttleStatus*> (fStatusEntry->GetObject());
370         if (!status) {
371                 AliError("Invalid object stored to CDB!");
372                 return 0;
373         }
374
375         return status;
376 }
377
378 //______________________________________________________________________________________________
379 Bool_t AliShuttle::WriteShuttleStatus(AliShuttleStatus* status)
380 {
381 // writes the status for one subdetector
382
383         if (fStatusEntry){
384                 delete fStatusEntry;
385                 fStatusEntry = 0;
386         }
387
388         Int_t run = GetCurrentRun();
389
390         AliCDBId id(AliCDBPath("SHUTTLE", "STATUS", fCurrentDetector), run, run);
391
392         fStatusEntry = new AliCDBEntry(status, id, new AliCDBMetaData);
393         fStatusEntry->SetOwner(1);
394
395         UInt_t result = AliCDBManager::Instance()->GetStorage(fgkLocalCDB)->Put(fStatusEntry);
396
397         if (!result) {
398                 AliError(Form("WriteShuttleStatus for %s, run %d failed", fCurrentDetector.Data(), run));
399                 return kFALSE;
400         }
401
402         return kTRUE;
403 }
404
405 //______________________________________________________________________________________________
406 void AliShuttle::UpdateShuttleStatus(AliShuttleStatus::Status newStatus, Bool_t increaseCount)
407 {
408   // changes the AliShuttleStatus for the given detector and run to the given status
409
410         if (!fStatusEntry){
411                 AliError("UNEXPECTED: fStatusEntry empty");
412                 return;
413         }
414
415         AliShuttleStatus* status = dynamic_cast<AliShuttleStatus*> (fStatusEntry->GetObject());
416
417         if (!status){
418                 AliError("UNEXPECTED: status could not be read from current CDB entry");
419                 return;
420         }
421
422         TString actionStr = Form("UpdateShuttleStatus - %s: Changing state from %s to %s", 
423                                 fCurrentDetector.Data(),
424                                 status->GetStatusName(), 
425                                 status->GetStatusName(newStatus));
426         Log("SHUTTLE", actionStr);
427         SetLastAction(actionStr);
428
429         status->SetStatus(newStatus);
430         if (increaseCount) status->IncreaseCount();
431
432         AliCDBManager::Instance()->GetStorage(fgkLocalCDB)->Put(fStatusEntry);
433 }
434 //______________________________________________________________________________________________
435 Bool_t AliShuttle::ContinueProcessing()
436 {
437 // this function reads the AliShuttleStatus information from CDB and
438 // checks if the processing should be continued
439 // if yes it returns kTRUE and updates the AliShuttleStatus with nextStatus
440
441         AliShuttleLogbookEntry::Status entryStatus =
442                 fLogbookEntry->GetDetectorStatus(fCurrentDetector);
443
444         if(entryStatus != AliShuttleLogbookEntry::kUnprocessed) {
445                 Log("SHUTTLE", Form("ContinueProcessing - %s is %s",
446                                 fCurrentDetector.Data(),
447                                 fLogbookEntry->GetDetectorStatusName(entryStatus)));
448                 return kFALSE;
449         }
450
451         // if we get here, according to Shuttle logbook subdetector is in UNPROCESSED state
452
453         // check if current run is first unprocessed run for current detector
454         if (fConfig->StrictRunOrder(fCurrentDetector) &&
455                 !fFirstUnprocessed[GetDetPos(fCurrentDetector)])
456         {
457                 Log("SHUTTLE", Form("ContinueProcessing - %s requires strict run ordering but this is not the first unprocessed run!"));
458                 return kFALSE;
459         }
460
461         AliShuttleStatus* status = ReadShuttleStatus();
462         if (!status) {
463                 // first time
464                 Log("SHUTTLE", Form("ContinueProcessing - %s: Processing first time",
465                                 fCurrentDetector.Data()));
466                 status = new AliShuttleStatus(AliShuttleStatus::kStarted);
467                 return WriteShuttleStatus(status);
468         }
469
470         // The following two cases shouldn't happen if Shuttle Logbook was correctly updated.
471         // If it happens it may mean Logbook updating failed... let's do it now!
472         if (status->GetStatus() == AliShuttleStatus::kDone ||
473             status->GetStatus() == AliShuttleStatus::kFailed){
474                 Log("SHUTTLE", Form("ContinueProcessing - %s is already %s. Updating Shuttle Logbook",
475                                         fCurrentDetector.Data(),
476                                         status->GetStatusName(status->GetStatus())));
477                 UpdateShuttleLogbook(fCurrentDetector.Data(),
478                                         status->GetStatusName(status->GetStatus()));
479                 return kFALSE;
480         }
481
482         if (status->GetStatus() == AliShuttleStatus::kStoreFailed) {
483                 Log("SHUTTLE",
484                         Form("ContinueProcessing - %s: Grid storage of one or more objects failed. Trying again now",
485                                 fCurrentDetector.Data()));
486                 if(TryToStoreAgain()){
487                         Log(fCurrentDetector.Data(), "ContinueProcessing - All objects successfully stored into OCDB");
488                         UpdateShuttleStatus(AliShuttleStatus::kDone);
489                         UpdateShuttleLogbook(fCurrentDetector.Data(), "DONE");
490                 } else {
491                         Log("SHUTTLE",
492                                 Form("ContinueProcessing - %s: Grid storage failed again",
493                                         fCurrentDetector.Data()));
494                 }
495                 return kFALSE;
496         }
497
498         // if we get here, there is a restart
499
500         // abort conditions
501         if (status->GetCount() >= fConfig->GetMaxRetries()) {
502                 Log("SHUTTLE",
503                         Form("ContinueProcessing - %s failed %d times in status %s - Updating Shuttle Logbook",
504                                 fCurrentDetector.Data(),
505                                 status->GetCount(), status->GetStatusName()));
506                 UpdateShuttleLogbook(fCurrentDetector.Data(), "FAILED");
507                 return kFALSE;
508         }
509
510         Log("SHUTTLE", Form("ContinueProcessing - %s: restarting. Aborted before with %s. Retry number %d.",
511                         fCurrentDetector.Data(),
512                         status->GetStatusName(), status->GetCount()));
513
514         UpdateShuttleStatus(AliShuttleStatus::kStarted, kTRUE);
515
516         return kTRUE;
517 }
518
519 //______________________________________________________________________________________________
520 Bool_t AliShuttle::Process(AliShuttleLogbookEntry* entry)
521 {
522         //
523         // Makes data retrieval for all detectors in the configuration.
524         // entry: Shuttle logbook entry, contains run paramenters and status of detectors
525         // (Unprocessed, Inactive, Failed or Done).
526         // Returns kFALSE in case of error occured and kTRUE otherwise
527         //
528
529         if(!entry) return kFALSE;
530
531         fLogbookEntry = entry;
532
533         if(fLogbookEntry->IsDone()){
534                 Log("SHUTTLE","Process - Shuttle is already DONE. Updating logbook");
535                 UpdateShuttleLogbook("shuttle_done");
536                 fLogbookEntry = 0;
537                 return kTRUE;
538         }
539
540
541         AliInfo(Form("\n\n \t\t\t^*^*^*^*^*^*^*^*^*^*^*^* run %d: START ^*^*^*^*^*^*^*^*^*^*^*^* \n",
542                                         GetCurrentRun()));
543
544         fLogbookEntry->Print("all");
545
546         // Initialization
547         Bool_t hasError = kFALSE;
548         for(Int_t iSys=0;iSys<3;iSys++) fFESCalled[iSys]=kFALSE;
549
550         AliCDBStorage *mainCDBSto = AliCDBManager::Instance()->GetStorage(fgkMainCDB);
551         if(mainCDBSto) mainCDBSto->QueryCDB(GetCurrentRun());
552         AliCDBStorage *mainRefSto = AliCDBManager::Instance()->GetStorage(fgkMainRefStorage);
553         if(mainRefSto) mainRefSto->QueryCDB(GetCurrentRun());
554
555         // Loop on detectors in the configuration
556         TIter iter(fConfig->GetDetectors());
557         TObjString* aDetector = 0;
558
559         while ((aDetector = (TObjString*) iter.Next()))
560         {
561                 fCurrentDetector = aDetector->String();
562
563                 if (!fConfig->HostProcessDetector(fCurrentDetector)) continue;
564
565                 AliPreprocessor* aPreprocessor =
566                         dynamic_cast<AliPreprocessor*> (fPreprocessorMap.GetValue(fCurrentDetector));
567                 if (!aPreprocessor)
568                 {
569                         Log("SHUTTLE",Form("Process - %s: no preprocessor registered. Skipping",
570                                                         fCurrentDetector.Data()));
571                         continue;
572                 }
573
574                 if (ContinueProcessing() == kFALSE) continue;
575
576                 AliInfo(Form("\n\n \t\t\t****** run %d - %s: START  ******",
577                                                 GetCurrentRun(), aDetector->GetName()));
578
579
580                 Int_t pid = fork();
581
582                 if (pid < 0)
583                 {
584                         Log("SHUTTLE", "ERROR: Forking failed");
585                 }
586                 else if (pid > 0)
587                 {
588                         // parent
589                         AliInfo(Form("In parent process of %d - %s: Starting monitoring",
590                                                         GetCurrentRun(), aDetector->GetName()));
591
592                         Long_t begin = time(0);
593
594                         int status; // to be used with waitpid, on purpose an int (not Int_t)!
595                         while (waitpid(pid, &status, WNOHANG) == 0)
596                         {
597                                 Long_t expiredTime = time(0) - begin;
598
599                                 if (expiredTime > fConfig->GetPPTimeOut())
600                                 {
601                                         Log("SHUTTLE", Form("Process time out. Run time: %d seconds. Killing...",
602                                                                 expiredTime));
603
604                                         kill(pid, 9);
605
606                                         hasError = kTRUE;
607
608                                         gSystem->Sleep(1000);
609                                 }
610                                 else
611                                 {
612                                         if (expiredTime % 60 == 0)
613                                         Log("SHUTTLE", Form("Checked process. Run time: %d seconds.",
614                                                                 expiredTime));
615                                         gSystem->Sleep(1000);
616                                 }
617                         }
618
619                         AliInfo(Form("In parent process of %d - %s: Client has terminated.",
620                                                                 GetCurrentRun(), aDetector->GetName()));
621
622                         if (WIFEXITED(status))
623                         {
624                                 Int_t returnCode = WEXITSTATUS(status);
625
626                                 Log("SHUTTLE", Form("The return code is %d", returnCode));
627
628                                 if (returnCode != 0)
629                                 hasError = kTRUE;
630                         }
631                 }
632                 else if (pid == 0)
633                 {
634                         // client
635                         AliInfo(Form("In client process of %d - %s", GetCurrentRun(), aDetector->GetName()));
636
637                         UInt_t result = ProcessCurrentDetector();
638
639                         Int_t returnCode = 0; // will be set to 1 in case of an error
640
641                         if (!result)
642                         {
643                                 returnCode = 1;
644                                 AliInfo(Form("\n \t\t\t****** run %d - %s: PREPROCESSOR ERROR ****** \n\n",
645                                                         GetCurrentRun(), aDetector->GetName()));
646                         }
647                         else if (result == 2)
648                         {
649                                 AliInfo(Form("\n \t\t\t****** run %d - %s: STORAGE ERROR ****** \n\n",
650                                                         GetCurrentRun(), aDetector->GetName()));
651                         } else
652                         {
653                                 AliInfo(Form("\n \t\t\t****** run %d - %s: DONE ****** \n\n",
654                                                         GetCurrentRun(), aDetector->GetName()));
655                         }
656
657                         if (result > 0)
658                         {
659                                 // Process successful: Update time_processed field in FES logbooks!
660                                 if (fFESCalled[kDAQ])
661                                 {
662                                         if (UpdateDAQTable() == kFALSE)
663                                         returnCode = 1;
664                                         fFESlist[kDAQ].Clear();
665                                 }
666                                 //if(fFESCalled[kDCS]) {
667                                 //  if (UpdateDCSTable(aDetector->GetName()) == kFALSE)
668                                 //    returnCode = 1;
669                                 //  fFESlist[kDCS].Clear();
670                                 //}
671                                 //if(fFESCalled[kHLT]) {
672                                 //  if (UpdateHLTTable(aDetector->GetName()) == kFALSE)
673                                 //    returnCode = 1;
674                                 //      fFESlist[kHLT].Clear();
675                                 //}
676                         }
677
678                         AliInfo(Form("Client process of %d - %s is exiting now with %d.",
679                                                         GetCurrentRun(), aDetector->GetName(), returnCode));
680
681                         // the client exits here
682                         gSystem->Exit(returnCode);
683
684                         AliError("We should never get here!!!");
685                 }
686         }
687
688         AliInfo(Form("\n\n \t\t\t^*^*^*^*^*^*^*^*^*^*^*^* run %d: FINISH ^*^*^*^*^*^*^*^*^*^*^*^* \n",
689                                                         GetCurrentRun()));
690
691         //check if shuttle is done for this run, if so update logbook
692         TObjArray checkEntryArray;
693         checkEntryArray.SetOwner(1);
694         TString whereClause = Form("where run=%d",GetCurrentRun());
695         if (QueryShuttleLogbook(whereClause.Data(), checkEntryArray)) {
696
697                 AliShuttleLogbookEntry* checkEntry = dynamic_cast<AliShuttleLogbookEntry*>
698                                                         (checkEntryArray.At(0));
699
700                 if (checkEntry)
701                 {
702                         if (checkEntry->IsDone())
703                         {
704                                 Log("SHUTTLE","Process - Shuttle is DONE. Updating logbook");
705                                 UpdateShuttleLogbook("shuttle_done");
706                         }
707                         else
708                         {
709                                 for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
710                                 {
711                                         if (checkEntry->GetDetectorStatus(iDet) == AliShuttleLogbookEntry::kUnprocessed)
712                                         {
713                                                 AliDebug(2, Form("Run %d: setting %s as \"not first time unprocessed\"",
714                                                                 checkEntry->GetRun(), GetDetName(iDet)));
715                                                 fFirstUnprocessed[iDet] = kFALSE;
716                                         }
717                                 }
718                         }
719                 }
720         }
721
722         fLogbookEntry = 0;
723
724         return hasError == kFALSE;
725 }
726
727 //______________________________________________________________________________________________
728 UInt_t AliShuttle::ProcessCurrentDetector()
729 {
730         //
731         // Makes data retrieval just for a specific detector (fCurrentDetector).
732         // Threre should be a configuration for this detector.
733
734         AliInfo(Form("Retrieving values for %s, run %d", fCurrentDetector.Data(), GetCurrentRun()));
735
736         UpdateShuttleStatus(AliShuttleStatus::kDCSStarted);
737
738         TString host(fConfig->GetDCSHost(fCurrentDetector));
739         Int_t port = fConfig->GetDCSPort(fCurrentDetector);
740
741         TIter iter(fConfig->GetDCSAliases(fCurrentDetector));
742         TObjString* anAlias;
743         TMap aliasMap;
744         aliasMap.SetOwner(1);
745
746         Bool_t aDCSError = kFALSE;
747         fGridError = kFALSE;
748
749         while ((anAlias = (TObjString*) iter.Next())) {
750                 TObjArray *valueSet = new TObjArray();
751                 valueSet->SetOwner(1);
752                 // TODO Test only... I've added a flag that allows to
753                 // exclude DCS archive DB query
754                 if(fgkProcessDCS){
755                         AliInfo("Querying DCS archive DB data...");
756                         aDCSError = (GetValueSet(host, port, anAlias->String(), valueSet) == 0);
757                 } else {
758                         AliInfo(Form("Skipping DCS processing. Port = %d",port));
759                         aDCSError = kFALSE;
760                 }
761                 if(!aDCSError) {
762                         aliasMap.Add(anAlias->Clone(), valueSet);
763                 }else{
764                         Log(fCurrentDetector, Form("ProcessCurrentDetector - Error while retrieving alias %s",
765                                         anAlias->GetName()));
766                         UpdateShuttleStatus(AliShuttleStatus::kDCSError, kTRUE);
767                         aliasMap.DeleteAll();
768                         return 0;
769                 }
770         }
771
772         // DCS Archive DB processing successful. Call Preprocessor!
773         UpdateShuttleStatus(AliShuttleStatus::kPPStarted);
774
775         AliPreprocessor* aPreprocessor =
776                 dynamic_cast<AliPreprocessor*> (fPreprocessorMap.GetValue(fCurrentDetector));
777
778         aPreprocessor->Initialize(GetCurrentRun(), GetCurrentStartTime(), GetCurrentEndTime());
779         UInt_t aPPResult = aPreprocessor->Process(&aliasMap);
780
781         UInt_t returnValue = 0;
782         if (aPPResult == 0) { // Preprocessor error
783                 UpdateShuttleStatus(AliShuttleStatus::kPPError);
784                 returnValue = 0;
785         } else if (fGridError == kFALSE) { // process and Grid storage ok!
786                 UpdateShuttleStatus(AliShuttleStatus::kDone);
787                 UpdateShuttleLogbook(fCurrentDetector, "DONE");
788                 Log(fCurrentDetector.Data(),
789                         "ProcessCurrentDetector - Preprocessor and Grid storage ended successfully");
790                 returnValue = 1;
791         } else { // Grid storage error (process ok, but object put in local storage)
792                 UpdateShuttleStatus(AliShuttleStatus::kStoreFailed);
793                 returnValue = 2;
794         }
795
796         aliasMap.DeleteAll();
797
798         return returnValue;
799 }
800
801 //______________________________________________________________________________________________
802 Bool_t AliShuttle::QueryShuttleLogbook(const char* whereClause,
803                 TObjArray& entries)
804 {
805 // Query DAQ's Shuttle logbook and fills detector status object.
806 // Call QueryRunParameters to query DAQ logbook for run parameters.
807
808         // check connection, in case connect
809         if(!Connect(3)) return kFALSE;
810
811         TString sqlQuery;
812         sqlQuery = Form("select * from logbook_shuttle %s order by run", whereClause);
813
814         TSQLResult* aResult = fServer[3]->Query(sqlQuery);
815         if (!aResult) {
816                 AliError(Form("Can't execute query <%s>!", sqlQuery.Data()));
817                 return kFALSE;
818         }
819
820         if(aResult->GetRowCount() == 0) {
821                 if(sqlQuery.Contains("where shuttle_done=0")){
822                         Log("SHUTTLE", "QueryShuttleLogbook - All runs in Shuttle Logbook are already DONE");
823                         delete aResult;
824                         return kTRUE;
825                 } else {
826                         AliError("No entries in Shuttle Logbook match request");
827                         delete aResult;
828                         return kFALSE;
829                 }
830         }
831
832         // TODO Check field count!
833         const UInt_t nCols = 24;
834         if (aResult->GetFieldCount() != (Int_t) nCols) {
835                 AliError("Invalid SQL result field number!");
836                 delete aResult;
837                 return kFALSE;
838         }
839
840         entries.SetOwner(1);
841
842         TSQLRow* aRow;
843         while ((aRow = aResult->Next())) {
844                 TString runString(aRow->GetField(0), aRow->GetFieldLength(0));
845                 Int_t run = runString.Atoi();
846
847                 AliShuttleLogbookEntry *entry = QueryRunParameters(run);
848                 if (!entry)
849                         continue;
850
851                 // loop on detectors
852                 for(UInt_t ii = 0; ii < nCols; ii++)
853                         entry->SetDetectorStatus(aResult->GetFieldName(ii), aRow->GetField(ii));
854
855                 entries.AddLast(entry);
856                 delete aRow;
857         }
858
859         if(sqlQuery.Contains("where shuttle_done=0"))
860                 Log("SHUTTLE", Form("QueryShuttleLogbook - Found %d unprocessed runs in Shuttle Logbook",
861                                                         entries.GetEntriesFast()));
862         delete aResult;
863         return kTRUE;
864 }
865
866 //______________________________________________________________________________________________
867 AliShuttleLogbookEntry* AliShuttle::QueryRunParameters(Int_t run)
868 {
869         //
870         // Retrieve run parameters written in the DAQ logbook and sets them into AliShuttleLogbookEntry object
871         //
872
873         // check connection, in case connect
874         if (!Connect(3))
875                 return 0;
876
877         TString sqlQuery;
878         sqlQuery.Form("select * from logbook where run=%d", run);
879
880         TSQLResult* aResult = fServer[3]->Query(sqlQuery);
881         if (!aResult) {
882                 AliError(Form("Can't execute query <%s>!", sqlQuery.Data()));
883                 return 0;
884         }
885
886         if (aResult->GetRowCount() == 0) {
887                 Log("SHUTTLE", Form("QueryRunParameters - No entry in DAQ Logbook for run %d. Skipping", run));
888                 delete aResult;
889                 return 0;
890         }
891
892         if (aResult->GetRowCount() > 1) {
893                 AliError(Form("More than one entry in DAQ Logbook for run %d. Skipping", run));
894                 delete aResult;
895                 return 0;
896         }
897
898         TSQLRow* aRow = aResult->Next();
899         if (!aRow)
900         {
901                 AliError(Form("Could not retrieve row for run %d. Skipping", run));
902                 delete aResult;
903                 return 0;
904         }
905
906         AliShuttleLogbookEntry* entry = new AliShuttleLogbookEntry(run);
907
908         for (Int_t ii = 0; ii < aResult->GetFieldCount(); ii++)
909                 entry->SetRunParameter(aResult->GetFieldName(ii), aRow->GetField(ii));
910
911         UInt_t startTime = entry->GetStartTime();
912         UInt_t endTime = entry->GetEndTime();
913
914         if (!startTime || !endTime || startTime > endTime) {
915                 Log("SHUTTLE",
916                         Form("QueryRunParameters - Invalid parameters for Run %d: startTime = %d, endTime = %d",
917                                 run, startTime, endTime));
918                 delete entry;
919                 delete aRow;
920                 delete aResult;
921                 return 0;
922         }
923
924         delete aRow;
925         delete aResult;
926
927         return entry;
928 }
929
930 //______________________________________________________________________________________________
931 Bool_t AliShuttle::TryToStoreAgain()
932 {
933   // Called in case the detector failed to store the object in Grid OCDB
934   // It tries to store the object again, if it does not find more recent and overlapping objects
935   // Calls underlying TryToStoreAgain(const char*) function twice, for OCDB and Reference storage.
936
937         AliInfo("Trying to store OCDB data again...");
938         Bool_t resultCDB = TryToStoreAgain(fgkMainCDB);
939
940         AliInfo("Trying to store reference data again...");
941         Bool_t resultRef = TryToStoreAgain(fgkMainRefStorage);
942
943         return resultCDB && resultRef;
944 }
945
946 //______________________________________________________________________________________________
947 Bool_t AliShuttle::TryToStoreAgain(TString& gridURI)
948 {
949   // Called by TryToStoreAgain(), performs actual storage retry
950
951         TObjArray* gridIds=0;
952
953         Bool_t result = kTRUE;
954
955         const char* type = 0;
956         TString backupURI;
957         if(gridURI == fgkMainCDB) {
958                 type = "OCDB";
959                 backupURI = fgkLocalCDB;
960         } else if(gridURI == fgkMainRefStorage) {
961                 type = "reference";
962                 backupURI = fgkLocalRefStorage;
963         } else {
964                 AliError(Form("Invalid storage URI: %s", gridURI.Data()));
965                 return kFALSE;
966         }
967
968         AliCDBManager* man = AliCDBManager::Instance();
969
970         AliCDBStorage *gridSto = man->GetStorage(gridURI);
971         if(!gridSto) {
972                 Log(fCurrentDetector.Data(),
973                         Form("TryToStoreAgain - cannot activate main %s storage", type));
974                 return kFALSE;
975         }
976
977         gridIds = gridSto->GetQueryCDBList();
978
979         // get objects previously stored in local CDB
980         AliCDBStorage *backupSto = man->GetStorage(backupURI);
981         AliCDBPath aPath(GetOfflineDetName(fCurrentDetector.Data()),"*","*");
982         // Local objects were stored with current run as Grid version!
983         TList* localEntries = backupSto->GetAll(aPath.GetPath(), GetCurrentRun(), GetCurrentRun());
984         localEntries->SetOwner(1);
985
986         // loop on local stored objects
987         TIter localIter(localEntries);
988         AliCDBEntry *aLocEntry = 0;
989         while((aLocEntry = dynamic_cast<AliCDBEntry*> (localIter.Next()))){
990                 aLocEntry->SetOwner(1);
991                 AliCDBId aLocId = aLocEntry->GetId();
992                 aLocEntry->SetVersion(-1);
993                 aLocEntry->SetSubVersion(-1);
994
995                 // loop on Grid valid Id's
996                 Bool_t store = kTRUE;
997                 TIter gridIter(gridIds);
998                 AliCDBId* aGridId = 0;
999                 while((aGridId = dynamic_cast<AliCDBId*> (gridIter.Next()))){
1000                         // If local object is valid up to infinity we store it only if it is
1001                         // the first unprocessed run!
1002                         if (aLocId.GetLastRun() == AliCDBRunRange::Infinity())
1003                         {
1004                                 if (!fFirstUnprocessed[GetDetPos(fCurrentDetector)])
1005                                 {
1006                                         Log(fCurrentDetector.Data(),
1007                                                 ("TryToStoreAgain - This object has validity infinite but there are previous unprocessed runs!"));
1008                                         continue;
1009                                 } else {
1010                                         break;
1011                                 }
1012                         }
1013                         if(aGridId->GetPath() != aLocId.GetPath()) continue;
1014                         // skip all objects valid up to infinity
1015                         if(aGridId->GetLastRun() == AliCDBRunRange::Infinity()) continue;
1016                         // if we get here, it means there's already some more recent object stored on Grid!
1017                         store = kFALSE;
1018                         break;
1019                 }
1020
1021                 if(!store){
1022                         Log(fCurrentDetector.Data(),
1023                                 Form("TryToStoreAgain - A more recent object already exists in %s storage: <%s>",
1024                                         type, aGridId->ToString().Data()));
1025                         // removing local filename...
1026                         // TODO maybe it's better not to remove it, it was not copied to the Grid!
1027                         TString filename;
1028                         backupSto->IdToFilename(aLocId, filename);
1029                         AliInfo(Form("Removing local file %s", filename.Data()));
1030                         gSystem->Exec(Form("rm %s",filename.Data()));
1031                         continue;
1032                 }
1033
1034                 // If we get here, the file can be stored!
1035                 Bool_t storeOk = gridSto->Put(aLocEntry);
1036                 if(storeOk){
1037                         Log(fCurrentDetector.Data(),
1038                                 Form("TryToStoreAgain - Object <%s> successfully put into %s storage",
1039                                         aLocId.ToString().Data(), type));
1040
1041                         // removing local filename...
1042                         TString filename;
1043                         backupSto->IdToFilename(aLocId, filename);
1044                         AliInfo(Form("Removing local file %s", filename.Data()));
1045                         gSystem->Exec(Form("rm %s", filename.Data()));
1046                         continue;
1047                 } else  {
1048                         Log(fCurrentDetector.Data(),
1049                                 Form("TryToStoreAgain - Grid %s storage of object <%s> failed again",
1050                                         type, aLocId.ToString().Data()));
1051                         result = kFALSE;
1052                 }
1053         }
1054         localEntries->Clear();
1055
1056         return result;
1057 }
1058
1059 //______________________________________________________________________________________________
1060 Bool_t AliShuttle::GetValueSet(const char* host, Int_t port, const char* alias,
1061                                 TObjArray* valueSet)
1062 {
1063 // Retrieve all "alias" data points from the DCS server
1064 // host, port: TSocket connection parameters
1065 // alias: name of the alias
1066 // valueSet: array of retrieved AliDCSValue's
1067
1068         AliDCSClient client(host, port, fTimeout, fRetries);
1069         if (!client.IsConnected()) {
1070                 return kFALSE;
1071         }
1072
1073         Int_t result = client.GetAliasValues(alias,
1074                 GetCurrentStartTime(), GetCurrentEndTime(), valueSet);
1075
1076         if (result < 0) {
1077                 Log(fCurrentDetector.Data(), Form("GetValueSet - Can't get '%s'! Reason: %s",
1078                         alias, AliDCSClient::GetErrorString(result)));
1079
1080                 if (result == AliDCSClient::fgkServerError) {
1081                         Log(fCurrentDetector.Data(), Form("GetValueSet - Server error: %s",
1082                                 client.GetServerError().Data()));
1083                 }
1084
1085                 return kFALSE;
1086         }
1087
1088         return kTRUE;
1089 }
1090
1091 //______________________________________________________________________________________________
1092 const char* AliShuttle::GetFile(Int_t system, const char* detector,
1093                 const char* id, const char* source)
1094 {
1095 // Get calibration file from file exchange servers
1096 // calls specific getter according to system index (kDAQ, kDCS, kHLT)
1097
1098         switch(system){
1099                 case kDAQ:
1100                         return GetDAQFileName(detector, id, source);
1101                         break;
1102                 case kDCS:
1103                         return GetDCSFileName(detector, id, source);
1104                         break;
1105                 case kHLT:
1106                         return GetHLTFileName(detector, id, source);
1107                         break;
1108                 default:
1109                         AliError(Form("No valid system index: %d",system));
1110         }
1111
1112         return 0;
1113 }
1114
1115 //______________________________________________________________________________________________
1116 TList* AliShuttle::GetFileSources(Int_t system, const char* detector, const char* id)
1117 {
1118 // Get sources producing the condition file Id from file exchange servers
1119 // calls specific getter according to system index (kDAQ, kDCS, kHLT)
1120
1121         switch(system){
1122                 case kDAQ:
1123                         return GetDAQFileSources(detector, id);
1124                         break;
1125                 case kDCS:
1126                         return GetDCSFileSources(detector, id);
1127                         break;
1128                 case kHLT:
1129                         return GetHLTFileSources(detector, id);
1130                         break;
1131                 default:
1132                         AliError(Form("No valid system index: %d",system));
1133         }
1134
1135         return NULL;
1136 }
1137
1138 //______________________________________________________________________________________________
1139 Bool_t AliShuttle::Connect(Int_t system)
1140 {
1141 // Connect to MySQL Server of the system's FES logbook
1142 // DAQ Logbook, Shuttle Logbook and DAQ FES Logbook are on the same host
1143
1144         // check connection: if already connected return
1145         if(fServer[system] && fServer[system]->IsConnected()) return kTRUE;
1146
1147         TString lbHost, lbUser, lbPass;
1148
1149         if (system < 3) // FES logbook servers
1150         {
1151                 lbHost = Form("mysql://%s", fConfig->GetFESlbHost(system));
1152                 lbUser = fConfig->GetFESlbUser(system);
1153                 lbPass = fConfig->GetFESlbPass(system);
1154         } else { // Run & Shuttle logbook servers
1155         // TODO Will the Shuttle logbook server be the same as the Run logbook server ???
1156                 lbHost = Form("mysql://%s", fConfig->GetDAQlbHost());
1157                 lbUser = fConfig->GetDAQlbUser();
1158                 lbPass = fConfig->GetDAQlbPass();
1159         }
1160
1161         fServer[system] = TSQLServer::Connect(lbHost.Data(), lbUser.Data(), lbPass.Data());
1162         if (!fServer[system] || !fServer[system]->IsConnected()) {
1163                 if(system < 3)
1164                 {
1165                 AliError(Form("Can't establish connection to FES logbook for %s",
1166                                         AliShuttleInterface::GetSystemName(system)));
1167                 } else {
1168                 AliError("Can't establish connection to Run logbook.");
1169                 }
1170                 if(fServer[system]) delete fServer[system];
1171                 return kFALSE;
1172         }
1173
1174         // Get tables
1175         // TODO in the configuration should the table name be there too?
1176         TSQLResult* aResult=0;
1177         switch(system){
1178                 case kDAQ:
1179                         aResult = fServer[kDAQ]->GetTables("REFSYSLOG");
1180                         break;
1181                 case kDCS:
1182                         //aResult = fServer[kDCS]->GetTables("REFSYSLOG");
1183                         break;
1184                 case kHLT:
1185                         //aResult = fServer[kHLT]->GetTables("REFSYSLOG");
1186                         break;
1187                 default:
1188                         aResult = fServer[3]->GetTables("REFSYSLOG");
1189                         break;
1190         }
1191
1192         delete aResult;
1193         return kTRUE;
1194 }
1195
1196 //______________________________________________________________________________________________
1197 const char* AliShuttle::GetDAQFileName(const char* detector, const char* id, const char* source)
1198 {
1199 // Retrieves a file from the DAQ FES.
1200 // First queris the DAQ logbook_fs for the DAQ file name, using the run, detector, id and source info
1201 // then calls RetrieveDAQFile(DAQfilename) for actual copy to local disk
1202 // run: current run being processed (given by Logbook entry fLogbookEntry)
1203 // detector: the Preprocessor name
1204 // id: provided as a parameter by the Preprocessor
1205 // source: provided by the Preprocessor through GetFileSources function
1206
1207         // check connection, in case connect
1208         if (!Connect(kDAQ))
1209         {
1210                 Log(detector, "GetDAQFileName - Couldn't connect to DAQ Logbook");
1211                 return 0;
1212         }
1213
1214         // Query preparation
1215         TString sqlQueryStart = "select filePath from logbook_fs where";
1216         TString whereClause = Form("run=%d and detector=\"%s\" and fileId=\"%s\" and DAQsource=\"%s\"",
1217                                 GetCurrentRun(), detector, id, source);
1218         TString sqlQuery = Form("%s %s", sqlQueryStart.Data(), whereClause.Data());
1219
1220         AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
1221
1222         // Query execution
1223         TSQLResult* aResult = 0;
1224         aResult = dynamic_cast<TSQLResult*> (fServer[kDAQ]->Query(sqlQuery));
1225         if (!aResult) {
1226                 Log(detector, Form("GetDAQFileName - Can't execute SQL query for: id = %s, source = %s",
1227                                 id, source));
1228                 return 0;
1229         }
1230
1231         if(aResult->GetRowCount() == 0)
1232         {
1233                 Log(detector,
1234                         Form("GetDAQFileName - No entry in FES table for: id = %s, source = %s",
1235                                 id, source));
1236                 delete aResult;
1237                 return 0;
1238         }
1239
1240         if (aResult->GetRowCount() > 1) {
1241                 Log(detector,
1242                         Form("GetDAQFileName - More than one entry in FES table for: id = %s, source = %s",
1243                                 id, source));
1244                 delete aResult;
1245                 return 0;
1246         }
1247
1248         TSQLRow* aRow = dynamic_cast<TSQLRow*> (aResult->Next());
1249
1250         if (!aRow){
1251                 Log(detector, Form("GetDAQFileName - Empty set result from query: id = %s, source = %s",
1252                                 id, source));
1253                 delete aResult;
1254                 return 0;
1255         }
1256
1257         TString filePath(aRow->GetField(0), aRow->GetFieldLength(0));
1258
1259         delete aResult;
1260         delete aRow;
1261
1262         AliDebug(2, Form("filePath = %s",filePath.Data()));
1263
1264         // retrieved file is renamed to make it unique
1265         TString localFileName = Form("%s_%d_%s_%s.shuttle",
1266                                         detector, GetCurrentRun(), id, source);
1267
1268         // file retrieval from DAQ FES
1269         Bool_t result = RetrieveDAQFile(filePath.Data(), localFileName.Data());
1270         if(!result) {
1271                 Log(detector, Form("GetDAQFileName - Copy of file %s from DAQ FES failed", filePath.Data()));
1272                 return 0;
1273         } else {
1274                 AliInfo(Form("File %s copied from DAQ FES into %s/%s",
1275                         filePath.Data(), fgkShuttleTempDir, localFileName.Data()));
1276         }
1277
1278
1279         fFESCalled[kDAQ]=kTRUE;
1280         TObjString *fileParams = new TObjString(Form("%s_!?!_%s", id, source));
1281         fFESlist[kDAQ].Add(fileParams);
1282
1283         return localFileName.Data();
1284
1285 }
1286
1287 //______________________________________________________________________________________________
1288 Bool_t AliShuttle::RetrieveDAQFile(const char* daqFileName, const char* localFileName)
1289 {
1290
1291         // check temp directory: trying to cd to temp; if it does not exist, create it
1292         AliDebug(2, Form("Copy file %s from DAQ FES into folder %s and rename it as %s",
1293                         daqFileName,fgkShuttleTempDir, localFileName));
1294
1295         void* dir = gSystem->OpenDirectory(fgkShuttleTempDir);
1296         if (dir == NULL) {
1297                 if (gSystem->mkdir(fgkShuttleTempDir, kTRUE)) {
1298                         AliError(Form("Can't open directory <%s>", fgkShuttleTempDir));
1299                         return kFALSE;
1300                 }
1301
1302         } else {
1303                 gSystem->FreeDirectory(dir);
1304         }
1305
1306         TString baseDAQFESFolder = "DAQ";
1307         TString command = Form("scp %s@%s:%s/%s %s/%s",
1308                 fConfig->GetFESUser(kDAQ),
1309                 fConfig->GetFESHost(kDAQ),
1310                 baseDAQFESFolder.Data(),
1311                 daqFileName,
1312                 fgkShuttleTempDir,
1313                 localFileName);
1314
1315         AliDebug(2, Form("%s",command.Data()));
1316
1317         UInt_t nRetries = 0;
1318         UInt_t maxRetries = 3;
1319
1320         // copy!! if successful TSystem::Exec returns 0
1321         while(nRetries++ < maxRetries) {
1322                 AliDebug(2, Form("Trying to copy file. Retry # %d", nRetries));
1323                 if(gSystem->Exec(command.Data()) == 0) return kTRUE;
1324         }
1325
1326         return kFALSE;
1327
1328 }
1329
1330 //______________________________________________________________________________________________
1331 TList* AliShuttle::GetDAQFileSources(const char* detector, const char* id)
1332 {
1333 // Retrieves a file from the DCS FES.
1334
1335         // check connection, in case connect
1336         if(!Connect(kDAQ)){
1337                 Log(detector, "GetDAQFileSources - Couldn't connect to DAQ Logbook");
1338                 return 0;
1339         }
1340
1341         // Query preparation
1342         TString sqlQueryStart = "select DAQsource from logbook_fs where";
1343         TString whereClause = Form("run=%d and detector=\"%s\" and fileId=\"%s\"",
1344                                 GetCurrentRun(), detector, id);
1345         TString sqlQuery = Form("%s %s", sqlQueryStart.Data(), whereClause.Data());
1346
1347         AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
1348
1349         // Query execution
1350         TSQLResult* aResult;
1351         aResult = fServer[kDAQ]->Query(sqlQuery);
1352         if (!aResult) {
1353                 Log(detector, Form("GetDAQFileSources - Can't execute SQL query for id: %s", id));
1354                 return 0;
1355         }
1356
1357         if (aResult->GetRowCount() == 0) {
1358                 Log(detector,
1359                         Form("GetDAQFileSources - No entry in FES table for id: %s", id));
1360                 delete aResult;
1361                 return 0;
1362         }
1363
1364         TSQLRow* aRow;
1365         TList *list = new TList();
1366         list->SetOwner(1);
1367
1368         while((aRow = aResult->Next())){
1369
1370                 TString daqSource(aRow->GetField(0), aRow->GetFieldLength(0));
1371                 AliDebug(2, Form("daqSource = %s", daqSource.Data()));
1372                 list->Add(new TObjString(daqSource));
1373                 delete aRow;
1374         }
1375         delete aResult;
1376
1377         return list;
1378
1379 }
1380
1381 //______________________________________________________________________________________________
1382 const char* AliShuttle::GetDCSFileName(const char* /*detector*/, const char* /*id*/, const char* /*source*/){
1383 // Retrieves a file from the DCS FES.
1384
1385 return "You're in DCS";
1386
1387 }
1388
1389 //______________________________________________________________________________________________
1390 TList* AliShuttle::GetDCSFileSources(const char* /*detector*/, const char* /*id*/){
1391 // Retrieves a file from the DCS FES.
1392
1393 return NULL;
1394
1395 }
1396
1397 //______________________________________________________________________________________________
1398 const char* AliShuttle::GetHLTFileName(const char* /*detector*/, const char* /*id*/, const char* /*source*/){
1399 // Retrieves a file from the HLT FES.
1400
1401 return "You're in HLT";
1402
1403 }
1404
1405 //______________________________________________________________________________________________
1406 TList* AliShuttle::GetHLTFileSources(const char* /*detector*/, const char* /*id*/){
1407 // Retrieves a file from the HLT FES.
1408
1409 return NULL;
1410
1411 }
1412
1413 //______________________________________________________________________________________________
1414 Bool_t AliShuttle::UpdateDAQTable()
1415 {
1416 // Update DAQ table filling time_processed field in all rows corresponding to current run and detector
1417
1418         // check connection, in case connect
1419         if(!Connect(kDAQ)){
1420                 Log(fCurrentDetector, "UpdateDAQTable - Couldn't connect to DAQ Logbook");
1421                 return kFALSE;
1422         }
1423
1424         TTimeStamp now; // now
1425
1426         // Loop on FES list entries
1427         TIter iter(&fFESlist[kDAQ]);
1428         TObjString *aFESentry=0;
1429         while((aFESentry = dynamic_cast<TObjString*> (iter.Next()))){
1430                 TString aFESentrystr = aFESentry->String();
1431                 TObjArray *aFESarray = aFESentrystr.Tokenize("_!?!_");
1432                 if(!aFESarray || aFESarray->GetEntries() != 2 ) {
1433                         Log(fCurrentDetector, Form("UpdateDAQTable - error updating FES entry. Check string: <%s>",
1434                                 aFESentrystr.Data()));
1435                         if(aFESarray) delete aFESarray;
1436                         return kFALSE;
1437                 }
1438                 const char* fileId = ((TObjString*) aFESarray->At(0))->GetName();
1439                 const char* daqSource = ((TObjString*) aFESarray->At(1))->GetName();
1440                 TString whereClause = Form("where run=%d and detector=\"%s\" and fileId=\"%s\" and DAQsource=\"%s\";",
1441                         GetCurrentRun(), fCurrentDetector.Data(), fileId, daqSource);
1442
1443                 delete aFESarray;
1444
1445                 TString sqlQuery = Form("update logbook_fs set time_processed=%d %s", now.GetSec(), whereClause.Data());
1446
1447                 AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
1448
1449                 // Query execution
1450                 TSQLResult* aResult;
1451                 aResult = dynamic_cast<TSQLResult*> (fServer[3]->Query(sqlQuery));
1452                 if (!aResult) {
1453                         Log(fCurrentDetector, Form("UpdateDAQTable - Can't execute SQL query <%s>", sqlQuery.Data()));
1454                         return kFALSE;
1455                 }
1456                 delete aResult;
1457         }
1458
1459         return kTRUE;
1460 }
1461
1462
1463 //______________________________________________________________________________________________
1464 Bool_t AliShuttle::UpdateShuttleLogbook(const char* detector, const char* status)
1465 {
1466 // Update Shuttle logbook filling detector or shuttle_done column
1467 // ex. of usage: UpdateShuttleLogbook("PHOS", "DONE") or UpdateShuttleLogbook("shuttle_done")
1468
1469         // check connection, in case connect
1470         if(!Connect(3)){
1471                 Log("SHUTTLE", "UpdateShuttleLogbook - Couldn't connect to DAQ Logbook.");
1472                 return kFALSE;
1473         }
1474
1475         TString detName(detector);
1476         TString setClause;
1477         if(detName == "shuttle_done") {
1478                 setClause = "set shuttle_done=1";
1479         } else {
1480                 TString statusStr(status);
1481                 if(statusStr.Contains("done", TString::kIgnoreCase) ||
1482                    statusStr.Contains("failed", TString::kIgnoreCase)){
1483                         setClause = Form("set %s=\"%s\"", detector, status);
1484                 } else {
1485                         Log("SHUTTLE",
1486                                 Form("UpdateShuttleLogbook - Invalid status <%s> for detector %s",
1487                                         status, detector));
1488                         return kFALSE;
1489                 }
1490         }
1491
1492         TString whereClause = Form("where run=%d", GetCurrentRun());
1493
1494         TString sqlQuery = Form("update logbook_shuttle %s %s",
1495                                         setClause.Data(), whereClause.Data());
1496
1497         AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
1498
1499         // Query execution
1500         TSQLResult* aResult;
1501         aResult = dynamic_cast<TSQLResult*> (fServer[3]->Query(sqlQuery));
1502         if (!aResult) {
1503                 Log("SHUTTLE", Form("UpdateShuttleLogbook - Can't execute query <%s>", sqlQuery.Data()));
1504                 return kFALSE;
1505         }
1506         delete aResult;
1507
1508         return kTRUE;
1509 }
1510
1511 //______________________________________________________________________________________________
1512 Int_t AliShuttle::GetCurrentRun() const
1513 {
1514 // Get current run from logbook entry
1515
1516         return fLogbookEntry ? fLogbookEntry->GetRun() : -1;
1517 }
1518
1519 //______________________________________________________________________________________________
1520 UInt_t AliShuttle::GetCurrentStartTime() const
1521 {
1522 // get current start time
1523
1524         return fLogbookEntry ? fLogbookEntry->GetStartTime() : 0;
1525 }
1526
1527 //______________________________________________________________________________________________
1528 UInt_t AliShuttle::GetCurrentEndTime() const
1529 {
1530 // get current end time from logbook entry
1531
1532         return fLogbookEntry ? fLogbookEntry->GetEndTime() : 0;
1533 }
1534
1535 //______________________________________________________________________________________________
1536 void AliShuttle::Log(const char* detector, const char* message)
1537 {
1538 // Fill log string with a message
1539
1540         void* dir = gSystem->OpenDirectory(fgkShuttleLogDir);
1541         if (dir == NULL) {
1542                 if (gSystem->mkdir(fgkShuttleLogDir, kTRUE)) {
1543                         AliError(Form("Can't open directory <%s>", fgkShuttleTempDir));
1544                         return;
1545                 }
1546
1547         } else {
1548                 gSystem->FreeDirectory(dir);
1549         }
1550
1551         TString toLog = Form("%s (%d): %s - ", TTimeStamp(time(0)).AsString("s"), getpid(), detector);
1552         if(GetCurrentRun()>=0 ) toLog += Form("run %d - ", GetCurrentRun());
1553         toLog += Form("%s", message);
1554
1555         AliInfo(toLog.Data());
1556
1557         TString fileName;
1558         fileName.Form("%s/%s.log", fgkShuttleLogDir, detector);
1559         gSystem->ExpandPathName(fileName);
1560
1561         ofstream logFile;
1562         logFile.open(fileName, ofstream::out | ofstream::app);
1563
1564         if (!logFile.is_open()) {
1565                 AliError(Form("Could not open file %s", fileName.Data()));
1566                 return;
1567         }
1568
1569         logFile << toLog.Data() << "\n";
1570
1571         logFile.close();
1572 }
1573
1574 //______________________________________________________________________________________________
1575 Bool_t AliShuttle::Collect(Int_t run)
1576 {
1577 //
1578 // Collects conditions data for all UNPROCESSED run written to DAQ LogBook in case of run = -1 (default)
1579 // If a dedicated run is given this run is processed
1580 //
1581 // In operational mode, this is the Shuttle function triggered by the EOR signal.
1582 //
1583
1584         if (run == -1)
1585                 Log("SHUTTLE","Collect - Shuttle called. Collecting conditions data for unprocessed runs");
1586         else
1587                 Log("SHUTTLE", Form("Collect - Shuttle called. Collecting conditions data for run %d", run));
1588
1589         SetLastAction("Starting");
1590
1591         TString whereClause("where shuttle_done=0");
1592         if (run != -1)
1593                 whereClause += Form(" and run=%d", run);
1594
1595         TObjArray shuttleLogbookEntries;
1596         if (!QueryShuttleLogbook(whereClause, shuttleLogbookEntries))
1597         {
1598                 Log("SHUTTLE", "Collect - Can't retrieve entries from Shuttle logbook");
1599                 return kFALSE;
1600         }
1601
1602         for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
1603                 fFirstUnprocessed[iDet] = kTRUE;
1604
1605         if (run != 1)
1606         {
1607                 // query Shuttle logbook for earlier runs, check if some detectors are unprocessed,
1608                 // flag them into fFirstUnprocessed array
1609                 TString whereClause(Form("where shuttle_done=0 and run < %d", run));
1610                 TObjArray tmpLogbookEntries;
1611                 if (!QueryShuttleLogbook(whereClause, tmpLogbookEntries))
1612                 {
1613                         Log("SHUTTLE", "Collect - Can't retrieve entries from Shuttle logbook");
1614                         return kFALSE;
1615                 }
1616
1617                 TIter iter(&tmpLogbookEntries);
1618                 AliShuttleLogbookEntry* anEntry = 0;
1619                 while ((anEntry = dynamic_cast<AliShuttleLogbookEntry*> (iter.Next())))
1620                 {
1621                         for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
1622                         {
1623                                 if (anEntry->GetDetectorStatus(iDet) == AliShuttleLogbookEntry::kUnprocessed)
1624                                 {
1625                                         AliDebug(2, Form("Run %d: setting %s as \"not first time unprocessed\"",
1626                                                         anEntry->GetRun(), GetDetName(iDet)));
1627                                         fFirstUnprocessed[iDet] = kFALSE;
1628                                 }
1629                         }
1630
1631                 }
1632
1633         }
1634
1635         if (!RetrieveConditionsData(shuttleLogbookEntries))
1636         {
1637                 Log("SHUTTLE", "Collect - Process of at least one run failed");
1638                 return kFALSE;
1639         }
1640
1641         return kTRUE;
1642 }
1643
1644 //______________________________________________________________________________________________
1645 Bool_t AliShuttle::RetrieveConditionsData(const TObjArray& dateEntries)
1646 {
1647 // Retrieve conditions data for all runs that aren't processed yet
1648
1649         Bool_t hasError = kFALSE;
1650
1651         TIter iter(&dateEntries);
1652         AliShuttleLogbookEntry* anEntry;
1653
1654         while ((anEntry = (AliShuttleLogbookEntry*) iter.Next())){
1655                 if (!Process(anEntry)){
1656                         hasError = kTRUE;
1657                 }
1658         }
1659
1660         return hasError == kFALSE;
1661 }
1662
1663 //______________________________________________________________________________________________
1664 ULong_t AliShuttle::GetTimeOfLastAction() const
1665 {
1666         ULong_t tmp;
1667         
1668         fMonitoringMutex->Lock();
1669
1670         tmp = fLastActionTime;
1671         
1672         fMonitoringMutex->UnLock();
1673         
1674         return tmp;
1675 }
1676
1677 //______________________________________________________________________________________________
1678 const TString AliShuttle::GetLastAction() const
1679 {
1680         // returns a string description of the last action
1681
1682         TString tmp;
1683         
1684         fMonitoringMutex->Lock();
1685         
1686         tmp = fLastAction;
1687         
1688         fMonitoringMutex->UnLock();
1689
1690         return tmp;     
1691 }
1692
1693 //______________________________________________________________________________________________
1694 void AliShuttle::SetLastAction(const char* action)
1695 {
1696         // updates the monitoring variables
1697         
1698         fMonitoringMutex->Lock();
1699         
1700         fLastAction = action;
1701         fLastActionTime = time(0);
1702         
1703         fMonitoringMutex->UnLock();
1704 }
1705
1706 //______________________________________________________________________________________________
1707 const char* AliShuttle::GetRunParameter(const char* param)
1708 {
1709 // returns run parameter read from DAQ logbook
1710
1711         if(!fLogbookEntry) {
1712                 AliError("No logbook entry!");
1713                 return 0;
1714         }
1715
1716         return fLogbookEntry->GetRunParameter(param);
1717 }