]> git.uio.no Git - u/mrichter/AliRoot.git/blob - SHUTTLE/AliShuttle.cxx
update (alberto):
[u/mrichter/AliRoot.git] / SHUTTLE / AliShuttle.cxx
1 /**************************************************************************
2  * Copyright(c) 1998-1999, ALICE Experiment at CERN, All rights reserved. *
3  *                                                                        *
4  * Author: The ALICE Off-line Project.                                    *
5  * Contributors are mentioned in the code where appropriate.              *
6  *                                                                        *
7  * Permission to use, copy, modify and distribute this software and its   *
8  * documentation strictly for non-commercial purposes is hereby granted   *
9  * without fee, provided that the above copyright notice appears in all   *
10  * copies and that both the copyright notice and this permission notice   *
11  * appear in the supporting documentation. The authors make no claims     *
12  * about the suitability of this software for any purpose. It is          *
13  * provided "as is" without express or implied warranty.                  *
14  **************************************************************************/
15
16 /*
17 $Log$
18 Revision 1.20  2006/11/16 16:16:48  jgrosseo
19 introducing strict run ordering flag
20 removed giving preprocessor name to preprocessor, they have to know their name themselves ;-)
21
22 Revision 1.19  2006/11/06 14:23:04  jgrosseo
23 major update (Alberto)
24 o) reading of run parameters from the logbook
25 o) online offline naming conversion
26 o) standalone DCSclient package
27
28 Revision 1.18  2006/10/20 15:22:59  jgrosseo
29 o) Adding time out to the execution of the preprocessors: The Shuttle forks and the parent process monitors the child
30 o) Merging Collect, CollectAll, CollectNew function
31 o) Removing implementation of empty copy constructors (declaration still there!)
32
33 Revision 1.17  2006/10/05 16:20:55  jgrosseo
34 adapting to new CDB classes
35
36 Revision 1.16  2006/10/05 15:46:26  jgrosseo
37 applying to the new interface
38
39 Revision 1.15  2006/10/02 16:38:39  jgrosseo
40 update (alberto):
41 fixed memory leaks
42 storing of objects that failed to be stored to the grid before
43 interfacing of shuttle status table in daq system
44
45 Revision 1.14  2006/08/29 09:16:05  jgrosseo
46 small update
47
48 Revision 1.13  2006/08/15 10:50:00  jgrosseo
49 effc++ corrections (alberto)
50
51 Revision 1.12  2006/08/08 14:19:29  jgrosseo
52 Update to shuttle classes (Alberto)
53
54 - Possibility to set the full object's path in the Preprocessor's and
55 Shuttle's  Store functions
56 - Possibility to extend the object's run validity in the same classes
57 ("startValidity" and "validityInfinite" parameters)
58 - Implementation of the StoreReferenceData function to store reference
59 data in a dedicated CDB storage.
60
61 Revision 1.11  2006/07/21 07:37:20  jgrosseo
62 last run is stored after each run
63
64 Revision 1.10  2006/07/20 09:54:40  jgrosseo
65 introducing status management: The processing per subdetector is divided into several steps,
66 after each step the status is stored on disk. If the system crashes in any of the steps the Shuttle
67 can keep track of the number of failures and skips further processing after a certain threshold is
68 exceeded. These thresholds can be configured in LDAP.
69
70 Revision 1.9  2006/07/19 10:09:55  jgrosseo
71 new configuration, accesst to DAQ FES (Alberto)
72
73 Revision 1.8  2006/07/11 12:44:36  jgrosseo
74 adding parameters for extended validity range of data produced by preprocessor
75
76 Revision 1.7  2006/07/10 14:37:09  jgrosseo
77 small fix + todo comment
78
79 Revision 1.6  2006/07/10 13:01:41  jgrosseo
80 enhanced storing of last sucessfully processed run (alberto)
81
82 Revision 1.5  2006/07/04 14:59:57  jgrosseo
83 revision of AliDCSValue: Removed wrapper classes, reduced storage size per value by factor 2
84
85 Revision 1.4  2006/06/12 09:11:16  jgrosseo
86 coding conventions (Alberto)
87
88 Revision 1.3  2006/06/06 14:26:40  jgrosseo
89 o) removed files that were moved to STEER
90 o) shuttle updated to follow the new interface (Alberto)
91
92 Revision 1.2  2006/03/07 07:52:34  hristov
93 New version (B.Yordanov)
94
95 Revision 1.6  2005/11/19 17:19:14  byordano
96 RetrieveDATEEntries and RetrieveConditionsData added
97
98 Revision 1.5  2005/11/19 11:09:27  byordano
99 AliShuttle declaration added
100
101 Revision 1.4  2005/11/17 17:47:34  byordano
102 TList changed to TObjArray
103
104 Revision 1.3  2005/11/17 14:43:23  byordano
105 import to local CVS
106
107 Revision 1.1.1.1  2005/10/28 07:33:58  hristov
108 Initial import as subdirectory in AliRoot
109
110 Revision 1.2  2005/09/13 08:41:15  byordano
111 default startTime endTime added
112
113 Revision 1.4  2005/08/30 09:13:02  byordano
114 some docs added
115
116 Revision 1.3  2005/08/29 21:15:47  byordano
117 some docs added
118
119 */
120
121 //
122 // This class is the main manager for AliShuttle. 
123 // It organizes the data retrieval from DCS and call the 
124 // interface methods of AliPreprocessor.
125 // For every detector in AliShuttleConfgi (see AliShuttleConfig),
126 // data for its set of aliases is retrieved. If there is registered
127 // AliPreprocessor for this detector then it will be used
128 // accroding to the schema (see AliPreprocessor).
129 // If there isn't registered AliPreprocessor than the retrieved
130 // data is stored automatically to the undelying AliCDBStorage.
131 // For detSpec is used the alias name.
132 //
133
134 #include "AliShuttle.h"
135
136 #include "AliCDBManager.h"
137 #include "AliCDBStorage.h"
138 #include "AliCDBId.h"
139 #include "AliCDBRunRange.h"
140 #include "AliCDBPath.h"
141 #include "AliCDBEntry.h"
142 #include "AliShuttleConfig.h"
143 #include "DCSClient/AliDCSClient.h"
144 #include "AliLog.h"
145 #include "AliPreprocessor.h"
146 #include "AliShuttleStatus.h"
147 #include "AliShuttleLogbookEntry.h"
148
149 #include <TSystem.h>
150 #include <TObject.h>
151 #include <TString.h>
152 #include <TTimeStamp.h>
153 #include <TObjString.h>
154 #include <TSQLServer.h>
155 #include <TSQLResult.h>
156 #include <TSQLRow.h>
157 #include <TMutex.h>
158
159 #include <fstream>
160
161 #include <sys/types.h>
162 #include <sys/wait.h>
163
164 ClassImp(AliShuttle)
165
166 TString AliShuttle::fgkMainCDB("alien://folder=ShuttleCDB");
167 TString AliShuttle::fgkLocalCDB("local://LocalShuttleCDB");
168 TString AliShuttle::fgkMainRefStorage("alien://folder=ShuttleReference");
169 TString AliShuttle::fgkLocalRefStorage("local://LocalReferenceStorage");
170
171 Bool_t AliShuttle::fgkProcessDCS(kTRUE); 
172
173 const char* AliShuttle::fgkShuttleTempDir = gSystem->ExpandPathName("$ALICE_ROOT/SHUTTLE/temp");
174 const char* AliShuttle::fgkShuttleLogDir = gSystem->ExpandPathName("$ALICE_ROOT/SHUTTLE/log");
175
176 //______________________________________________________________________________________________
177 AliShuttle::AliShuttle(const AliShuttleConfig* config,
178                 UInt_t timeout, Int_t retries):
179 fConfig(config),
180 fTimeout(timeout), fRetries(retries),
181 fPreprocessorMap(),
182 fLogbookEntry(0),
183 fCurrentDetector(),
184 fStatusEntry(0),
185 fGridError(kFALSE),
186 fMonitoringMutex(0),
187 fLastActionTime(0),
188 fLastAction()
189 {
190         //
191         // config: AliShuttleConfig used
192         // timeout: timeout used for AliDCSClient connection
193         // retries: the number of retries in case of connection error.
194         //
195
196         if (!fConfig->IsValid()) AliFatal("********** !!!!! Invalid configuration !!!!! **********");
197         for(int iSys=0;iSys<4;iSys++) {
198                 fServer[iSys]=0;
199                 if (iSys < 3)
200                         fFXSlist[iSys].SetOwner(kTRUE);
201         }
202         fPreprocessorMap.SetOwner(kTRUE);
203
204         for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
205                 fFirstUnprocessed[iDet] = kFALSE;
206
207         fMonitoringMutex = new TMutex();
208 }
209
210 //______________________________________________________________________________________________
211 AliShuttle::~AliShuttle()
212 {
213 // destructor
214
215         fPreprocessorMap.DeleteAll();
216         for(int iSys=0;iSys<4;iSys++)
217                 if(fServer[iSys]) {
218                         fServer[iSys]->Close();
219                         delete fServer[iSys];
220                         fServer[iSys] = 0;
221                 }
222
223         if (fStatusEntry){
224                 delete fStatusEntry;
225                 fStatusEntry = 0;
226         }
227         
228         if (fMonitoringMutex) 
229         {
230                 delete fMonitoringMutex;
231                 fMonitoringMutex = 0;
232         }
233 }
234
235 //______________________________________________________________________________________________
236 void AliShuttle::RegisterPreprocessor(AliPreprocessor* preprocessor)
237 {
238         //
239         // Registers new AliPreprocessor.
240         // It uses GetName() for indentificator of the pre processor.
241         // The pre processor is registered it there isn't any other
242         // with the same identificator (GetName()).
243         //
244
245         const char* detName = preprocessor->GetName();
246         if(GetDetPos(detName) < 0)
247                 AliFatal(Form("********** !!!!! Invalid detector name: %s !!!!! **********", detName));
248
249         if (fPreprocessorMap.GetValue(detName)) {
250                 AliWarning(Form("AliPreprocessor %s is already registered!", detName));
251                 return;
252         }
253
254         fPreprocessorMap.Add(new TObjString(detName), preprocessor);
255 }
256 //______________________________________________________________________________________________
257 UInt_t AliShuttle::Store(const AliCDBPath& path, TObject* object,
258                 AliCDBMetaData* metaData, Int_t validityStart, Bool_t validityInfinite)
259 {
260   // Stores a CDB object in the storage for offline reconstruction. Objects that are not needed for
261   // offline reconstruction, but should be stored anyway (e.g. for debugging) should NOT be stored
262   // using this function. Use StoreReferenceData instead!
263   // It calls WriteToCDB function which perform actual storage
264
265         return WriteToCDB(fgkMainCDB, fgkLocalCDB, path, object,
266                                 metaData, validityStart, validityInfinite);
267
268 }
269
270 //______________________________________________________________________________________________
271 UInt_t AliShuttle::StoreReferenceData(const AliCDBPath& path, TObject* object, AliCDBMetaData* metaData)
272 {
273   // Stores a CDB object in the storage for reference data. This objects will not be available during
274   // offline reconstrunction. Use this function for reference data only!
275   // It calls WriteToCDB function which perform actual storage
276
277         return WriteToCDB(fgkMainRefStorage, fgkLocalRefStorage, path, object, metaData);
278
279 }
280
281 //______________________________________________________________________________________________
282 UInt_t AliShuttle::WriteToCDB(const char* mainUri, const char* localUri,
283                         const AliCDBPath& path, TObject* object, AliCDBMetaData* metaData,
284                         Int_t validityStart, Bool_t validityInfinite)
285 {
286   // write object into the CDB. Parameters are passed by Store and StoreReferenceData functions.
287   // The parameters are:
288   //   1) Uri of the main storage (Grid)
289   //   2) Uri of the backup storage (Local)
290   //   3) the object's path.
291   //   4) the object to be stored
292   //   5) the metaData to be associated with the object
293   //   6) the validity start run number w.r.t. the current run,
294   //      if the data is valid only for this run leave the default 0
295   //   7) specifies if the calibration data is valid for infinity (this means until updated),
296   //      typical for calibration runs, the default is kFALSE
297   //
298   // returns 0 if fail
299   //         1 if stored in main (Grid) storage
300   //         2 if stored in backup (Local) storage
301
302         const char* cdbType = (mainUri == fgkMainCDB) ? "CDB" : "Reference";
303
304         Int_t firstRun = GetCurrentRun() - validityStart;
305         if(firstRun < 0) {
306                 AliError("First valid run happens to be less than 0! Setting it to 0.");
307                 firstRun=0;
308         }
309
310         Int_t lastRun = -1;
311         if(validityInfinite) {
312                 lastRun = AliCDBRunRange::Infinity();
313         } else {
314                 lastRun = GetCurrentRun();
315         }
316
317         AliCDBId id(path, firstRun, lastRun, -1, -1);
318
319         if(! dynamic_cast<TObjString*> (metaData->GetProperty("RunUsed(TObjString)"))){
320                 TObjString runUsed = Form("%d", GetCurrentRun());
321                 metaData->SetProperty("RunUsed(TObjString)",&runUsed);
322         }
323
324         UInt_t result = 0;
325
326         if (!(AliCDBManager::Instance()->GetStorage(mainUri))) {
327                 AliError(Form("WriteToCDB - Cannot activate main %s storage", cdbType));
328         } else {
329                 result = (UInt_t) AliCDBManager::Instance()->GetStorage(mainUri)
330                                         ->Put(object, id, metaData);
331         }
332
333         if(!result) {
334
335                 Log(fCurrentDetector,
336                         Form("WriteToCDB - Problem with main %s storage. Putting <%s> into backup storage",
337                                 cdbType, path.GetPath().Data()));
338
339                 // Set Grid version to current run number, to ease retrieval later
340                 id.SetVersion(GetCurrentRun());
341
342                 result = AliCDBManager::Instance()->GetStorage(localUri)
343                                         ->Put(object, id, metaData);
344
345                 if(result) {
346                         result = 2;
347                         fGridError = kTRUE;
348                 }else{
349                         Log(fCurrentDetector, "WriteToCDB - Can't store data!");
350                 }
351         }
352
353         return result;
354
355 }
356
357 //______________________________________________________________________________________________
358 AliShuttleStatus* AliShuttle::ReadShuttleStatus()
359 {
360 // Reads the AliShuttleStatus from the CDB
361
362         if (fStatusEntry){
363                 delete fStatusEntry;
364                 fStatusEntry = 0;
365         }
366
367         fStatusEntry = AliCDBManager::Instance()->GetStorage(AliShuttle::GetLocalCDB())
368                 ->Get(Form("/SHUTTLE/STATUS/%s", fCurrentDetector.Data()), GetCurrentRun());
369
370         if (!fStatusEntry) return 0;
371         fStatusEntry->SetOwner(1);
372
373         AliShuttleStatus* status = dynamic_cast<AliShuttleStatus*> (fStatusEntry->GetObject());
374         if (!status) {
375                 AliError("Invalid object stored to CDB!");
376                 return 0;
377         }
378
379         return status;
380 }
381
382 //______________________________________________________________________________________________
383 Bool_t AliShuttle::WriteShuttleStatus(AliShuttleStatus* status)
384 {
385 // writes the status for one subdetector
386
387         if (fStatusEntry){
388                 delete fStatusEntry;
389                 fStatusEntry = 0;
390         }
391
392         Int_t run = GetCurrentRun();
393
394         AliCDBId id(AliCDBPath("SHUTTLE", "STATUS", fCurrentDetector), run, run);
395
396         fStatusEntry = new AliCDBEntry(status, id, new AliCDBMetaData);
397         fStatusEntry->SetOwner(1);
398
399         UInt_t result = AliCDBManager::Instance()->GetStorage(fgkLocalCDB)->Put(fStatusEntry);
400
401         if (!result) {
402                 AliError(Form("WriteShuttleStatus for %s, run %d failed", fCurrentDetector.Data(), run));
403                 return kFALSE;
404         }
405
406         return kTRUE;
407 }
408
409 //______________________________________________________________________________________________
410 void AliShuttle::UpdateShuttleStatus(AliShuttleStatus::Status newStatus, Bool_t increaseCount)
411 {
412   // changes the AliShuttleStatus for the given detector and run to the given status
413
414         if (!fStatusEntry){
415                 AliError("UNEXPECTED: fStatusEntry empty");
416                 return;
417         }
418
419         AliShuttleStatus* status = dynamic_cast<AliShuttleStatus*> (fStatusEntry->GetObject());
420
421         if (!status){
422                 AliError("UNEXPECTED: status could not be read from current CDB entry");
423                 return;
424         }
425
426         TString actionStr = Form("UpdateShuttleStatus - %s: Changing state from %s to %s",
427                                 fCurrentDetector.Data(),
428                                 status->GetStatusName(), 
429                                 status->GetStatusName(newStatus));
430         Log("SHUTTLE", actionStr);
431         SetLastAction(actionStr);
432
433         status->SetStatus(newStatus);
434         if (increaseCount) status->IncreaseCount();
435
436         AliCDBManager::Instance()->GetStorage(fgkLocalCDB)->Put(fStatusEntry);
437 }
438 //______________________________________________________________________________________________
439 Bool_t AliShuttle::ContinueProcessing()
440 {
441 // this function reads the AliShuttleStatus information from CDB and
442 // checks if the processing should be continued
443 // if yes it returns kTRUE and updates the AliShuttleStatus with nextStatus
444
445         AliShuttleLogbookEntry::Status entryStatus =
446                 fLogbookEntry->GetDetectorStatus(fCurrentDetector);
447
448         if(entryStatus != AliShuttleLogbookEntry::kUnprocessed) {
449                 Log("SHUTTLE", Form("ContinueProcessing - %s is %s",
450                                 fCurrentDetector.Data(),
451                                 fLogbookEntry->GetDetectorStatusName(entryStatus)));
452                 return kFALSE;
453         }
454
455         // if we get here, according to Shuttle logbook subdetector is in UNPROCESSED state
456
457         // check if current run is first unprocessed run for current detector
458         if (fConfig->StrictRunOrder(fCurrentDetector) &&
459                 !fFirstUnprocessed[GetDetPos(fCurrentDetector)])
460         {
461                 Log("SHUTTLE", Form("ContinueProcessing - %s requires strict run ordering but this is not the first unprocessed run!"));
462                 return kFALSE;
463         }
464
465         AliShuttleStatus* status = ReadShuttleStatus();
466         if (!status) {
467                 // first time
468                 Log("SHUTTLE", Form("ContinueProcessing - %s: Processing first time",
469                                 fCurrentDetector.Data()));
470                 status = new AliShuttleStatus(AliShuttleStatus::kStarted);
471                 return WriteShuttleStatus(status);
472         }
473
474         // The following two cases shouldn't happen if Shuttle Logbook was correctly updated.
475         // If it happens it may mean Logbook updating failed... let's do it now!
476         if (status->GetStatus() == AliShuttleStatus::kDone ||
477             status->GetStatus() == AliShuttleStatus::kFailed){
478                 Log("SHUTTLE", Form("ContinueProcessing - %s is already %s. Updating Shuttle Logbook",
479                                         fCurrentDetector.Data(),
480                                         status->GetStatusName(status->GetStatus())));
481                 UpdateShuttleLogbook(fCurrentDetector.Data(),
482                                         status->GetStatusName(status->GetStatus()));
483                 return kFALSE;
484         }
485
486         if (status->GetStatus() == AliShuttleStatus::kStoreFailed) {
487                 Log("SHUTTLE",
488                         Form("ContinueProcessing - %s: Grid storage of one or more objects failed. Trying again now",
489                                 fCurrentDetector.Data()));
490                 if(TryToStoreAgain()){
491                         Log(fCurrentDetector.Data(), "ContinueProcessing - All objects successfully stored into OCDB");
492                         UpdateShuttleStatus(AliShuttleStatus::kDone);
493                         UpdateShuttleLogbook(fCurrentDetector.Data(), "DONE");
494                 } else {
495                         Log("SHUTTLE",
496                                 Form("ContinueProcessing - %s: Grid storage failed again",
497                                         fCurrentDetector.Data()));
498                 }
499                 return kFALSE;
500         }
501
502         // if we get here, there is a restart
503
504         // abort conditions
505         if (status->GetCount() >= fConfig->GetMaxRetries()) {
506                 Log("SHUTTLE",
507                         Form("ContinueProcessing - %s failed %d times in status %s - Updating Shuttle Logbook",
508                                 fCurrentDetector.Data(),
509                                 status->GetCount(), status->GetStatusName()));
510                 UpdateShuttleLogbook(fCurrentDetector.Data(), "FAILED");
511                 return kFALSE;
512         }
513
514         Log("SHUTTLE", Form("ContinueProcessing - %s: restarting. Aborted before with %s. Retry number %d.",
515                         fCurrentDetector.Data(),
516                         status->GetStatusName(), status->GetCount()));
517
518         UpdateShuttleStatus(AliShuttleStatus::kStarted, kTRUE);
519
520         return kTRUE;
521 }
522
523 //______________________________________________________________________________________________
524 Bool_t AliShuttle::Process(AliShuttleLogbookEntry* entry)
525 {
526         //
527         // Makes data retrieval for all detectors in the configuration.
528         // entry: Shuttle logbook entry, contains run paramenters and status of detectors
529         // (Unprocessed, Inactive, Failed or Done).
530         // Returns kFALSE in case of error occured and kTRUE otherwise
531         //
532
533         if(!entry) return kFALSE;
534
535         fLogbookEntry = entry;
536
537         if(fLogbookEntry->IsDone()){
538                 Log("SHUTTLE","Process - Shuttle is already DONE. Updating logbook");
539                 UpdateShuttleLogbook("shuttle_done");
540                 fLogbookEntry = 0;
541                 return kTRUE;
542         }
543
544
545         AliInfo(Form("\n\n \t\t\t^*^*^*^*^*^*^*^*^*^*^*^* run %d: START ^*^*^*^*^*^*^*^*^*^*^*^* \n",
546                                         GetCurrentRun()));
547
548         fLogbookEntry->Print("all");
549
550         // Initialization
551         Bool_t hasError = kFALSE;
552         for(Int_t iSys=0;iSys<3;iSys++) fFXSCalled[iSys]=kFALSE;
553
554         AliCDBStorage *mainCDBSto = AliCDBManager::Instance()->GetStorage(fgkMainCDB);
555         if(mainCDBSto) mainCDBSto->QueryCDB(GetCurrentRun());
556         AliCDBStorage *mainRefSto = AliCDBManager::Instance()->GetStorage(fgkMainRefStorage);
557         if(mainRefSto) mainRefSto->QueryCDB(GetCurrentRun());
558
559         // Loop on detectors in the configuration
560         TIter iter(fConfig->GetDetectors());
561         TObjString* aDetector = 0;
562
563         while ((aDetector = (TObjString*) iter.Next()))
564         {
565                 fCurrentDetector = aDetector->String();
566
567                 if (!fConfig->HostProcessDetector(fCurrentDetector)) continue;
568
569                 AliPreprocessor* aPreprocessor =
570                         dynamic_cast<AliPreprocessor*> (fPreprocessorMap.GetValue(fCurrentDetector));
571                 if (!aPreprocessor)
572                 {
573                         Log("SHUTTLE",Form("Process - %s: no preprocessor registered. Skipping",
574                                                         fCurrentDetector.Data()));
575                         continue;
576                 }
577
578                 if (ContinueProcessing() == kFALSE) continue;
579
580                 AliInfo(Form("\n\n \t\t\t****** run %d - %s: START  ******",
581                                                 GetCurrentRun(), aDetector->GetName()));
582
583
584                 Int_t pid = fork();
585
586                 if (pid < 0)
587                 {
588                         Log("SHUTTLE", "ERROR: Forking failed");
589                 }
590                 else if (pid > 0)
591                 {
592                         // parent
593                         AliInfo(Form("In parent process of %d - %s: Starting monitoring",
594                                                         GetCurrentRun(), aDetector->GetName()));
595
596                         Long_t begin = time(0);
597
598                         int status; // to be used with waitpid, on purpose an int (not Int_t)!
599                         while (waitpid(pid, &status, WNOHANG) == 0)
600                         {
601                                 Long_t expiredTime = time(0) - begin;
602
603                                 if (expiredTime > fConfig->GetPPTimeOut())
604                                 {
605                                         Log("SHUTTLE", Form("Process time out. Run time: %d seconds. Killing...",
606                                                                 expiredTime));
607
608                                         kill(pid, 9);
609
610                                         hasError = kTRUE;
611
612                                         gSystem->Sleep(1000);
613                                 }
614                                 else
615                                 {
616                                         if (expiredTime % 60 == 0)
617                                         Log("SHUTTLE", Form("Checked process. Run time: %d seconds.",
618                                                                 expiredTime));
619                                         gSystem->Sleep(1000);
620                                 }
621                         }
622
623                         AliInfo(Form("In parent process of %d - %s: Client has terminated.",
624                                                                 GetCurrentRun(), aDetector->GetName()));
625
626                         if (WIFEXITED(status))
627                         {
628                                 Int_t returnCode = WEXITSTATUS(status);
629
630                                 Log("SHUTTLE", Form("The return code is %d", returnCode));
631
632                                 if (returnCode != 0)
633                                 hasError = kTRUE;
634                         }
635                 }
636                 else if (pid == 0)
637                 {
638                         // client
639                         AliInfo(Form("In client process of %d - %s", GetCurrentRun(), aDetector->GetName()));
640
641                         UInt_t result = ProcessCurrentDetector();
642
643                         Int_t returnCode = 0; // will be set to 1 in case of an error
644
645                         if (!result)
646                         {
647                                 returnCode = 1;
648                                 AliInfo(Form("\n \t\t\t****** run %d - %s: PREPROCESSOR ERROR ****** \n\n",
649                                                         GetCurrentRun(), aDetector->GetName()));
650                         }
651                         else if (result == 2)
652                         {
653                                 AliInfo(Form("\n \t\t\t****** run %d - %s: STORAGE ERROR ****** \n\n",
654                                                         GetCurrentRun(), aDetector->GetName()));
655                         } else
656                         {
657                                 AliInfo(Form("\n \t\t\t****** run %d - %s: DONE ****** \n\n",
658                                                         GetCurrentRun(), aDetector->GetName()));
659                         }
660
661                         if (result > 0)
662                         {
663                                 // Process successful: Update time_processed field in FXS logbooks!
664                                 if (fFXSCalled[kDAQ])
665                                 {
666                                         if (UpdateDAQTable() == kFALSE)
667                                         returnCode = 1;
668                                         fFXSlist[kDAQ].Clear();
669                                 }
670                                 //if(fFXSCalled[kDCS]) {
671                                 //  if (UpdateDCSTable(aDetector->GetName()) == kFALSE)
672                                 //    returnCode = 1;
673                                 //  fFXSlist[kDCS].Clear();
674                                 //}
675                                 //if(fFXSCalled[kHLT]) {
676                                 //  if (UpdateHLTTable(aDetector->GetName()) == kFALSE)
677                                 //    returnCode = 1;
678                                 //      fFXSlist[kHLT].Clear();
679                                 //}
680                         }
681
682                         AliInfo(Form("Client process of %d - %s is exiting now with %d.",
683                                                         GetCurrentRun(), aDetector->GetName(), returnCode));
684
685                         // the client exits here
686                         gSystem->Exit(returnCode);
687
688                         AliError("We should never get here!!!");
689                 }
690         }
691
692         AliInfo(Form("\n\n \t\t\t^*^*^*^*^*^*^*^*^*^*^*^* run %d: FINISH ^*^*^*^*^*^*^*^*^*^*^*^* \n",
693                                                         GetCurrentRun()));
694
695         //check if shuttle is done for this run, if so update logbook
696         TObjArray checkEntryArray;
697         checkEntryArray.SetOwner(1);
698         TString whereClause = Form("where run=%d",GetCurrentRun());
699         if (QueryShuttleLogbook(whereClause.Data(), checkEntryArray)) {
700
701                 AliShuttleLogbookEntry* checkEntry = dynamic_cast<AliShuttleLogbookEntry*>
702                                                         (checkEntryArray.At(0));
703
704                 if (checkEntry)
705                 {
706                         if (checkEntry->IsDone())
707                         {
708                                 Log("SHUTTLE","Process - Shuttle is DONE. Updating logbook");
709                                 UpdateShuttleLogbook("shuttle_done");
710                         }
711                         else
712                         {
713                                 for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
714                                 {
715                                         if (checkEntry->GetDetectorStatus(iDet) == AliShuttleLogbookEntry::kUnprocessed)
716                                         {
717                                                 AliDebug(2, Form("Run %d: setting %s as \"not first time unprocessed\"",
718                                                                 checkEntry->GetRun(), GetDetName(iDet)));
719                                                 fFirstUnprocessed[iDet] = kFALSE;
720                                         }
721                                 }
722                         }
723                 }
724         }
725
726         fLogbookEntry = 0;
727
728         return hasError == kFALSE;
729 }
730
731 //______________________________________________________________________________________________
732 UInt_t AliShuttle::ProcessCurrentDetector()
733 {
734         //
735         // Makes data retrieval just for a specific detector (fCurrentDetector).
736         // Threre should be a configuration for this detector.
737
738         AliInfo(Form("Retrieving values for %s, run %d", fCurrentDetector.Data(), GetCurrentRun()));
739
740         UpdateShuttleStatus(AliShuttleStatus::kDCSStarted);
741
742         TMap dcsMap;
743         dcsMap.SetOwner(1);
744
745         Bool_t aDCSError = kFALSE;
746         fGridError = kFALSE;
747
748         // TODO Test only... I've added a flag that allows to
749         // exclude DCS archive DB query
750         if (!fgkProcessDCS)
751         {
752                 AliInfo("Skipping DCS processing!");
753                 aDCSError = kFALSE;
754         } else {
755                 TString host(fConfig->GetDCSHost(fCurrentDetector));
756                 Int_t port = fConfig->GetDCSPort(fCurrentDetector);
757
758                 // Retrieval of Aliases
759                 TObjString* anAlias = 0;
760                 TIter iterAliases(fConfig->GetDCSAliases(fCurrentDetector));
761                 while ((anAlias = (TObjString*) iterAliases.Next()))
762                 {
763                         TObjArray *valueSet = new TObjArray();
764                         valueSet->SetOwner(1);
765
766                         AliInfo("Querying DCS archive DB (Aliases)...");
767                         aDCSError = (GetValueSet(host, port, anAlias->String(), valueSet, kAlias) == 0);
768
769                         if(!aDCSError)
770                         {
771                                 dcsMap.Add(anAlias->Clone(), valueSet);
772                         } else {
773                                 Log(fCurrentDetector,
774                                         Form("ProcessCurrentDetector - Error while retrieving alias %s",
775                                                 anAlias->GetName()));
776                                 UpdateShuttleStatus(AliShuttleStatus::kDCSError);
777                                 dcsMap.DeleteAll();
778                                 return 0;
779                         }
780                 }
781
782                 // Retrieval of Data Points
783                 TObjString* aDP = 0;
784                 TIter iterDP(fConfig->GetDCSDataPoints(fCurrentDetector));
785                 while ((aDP = (TObjString*) iterDP.Next()))
786                 {
787                         TObjArray *valueSet = new TObjArray();
788                         valueSet->SetOwner(1);
789                         AliInfo("Querying DCS archive DB (Data Points)...");
790                         aDCSError = (GetValueSet(host, port, aDP->String(), valueSet, kDP) == 0);
791
792                         if(!aDCSError)
793                         {
794                                 dcsMap.Add(aDP->Clone(), valueSet);
795                         } else {
796                                 Log(fCurrentDetector,
797                                         Form("ProcessCurrentDetector - Error while retrieving data point %s",
798                                                 aDP->GetName()));
799                                 UpdateShuttleStatus(AliShuttleStatus::kDCSError);
800                                 dcsMap.DeleteAll();
801                                 return 0;
802                         }
803                 }
804         }
805
806         // DCS Archive DB processing successful. Call Preprocessor!
807         UpdateShuttleStatus(AliShuttleStatus::kPPStarted);
808
809         AliPreprocessor* aPreprocessor =
810                 dynamic_cast<AliPreprocessor*> (fPreprocessorMap.GetValue(fCurrentDetector));
811
812         aPreprocessor->Initialize(GetCurrentRun(), GetCurrentStartTime(), GetCurrentEndTime());
813         UInt_t aPPResult = aPreprocessor->Process(&dcsMap);
814
815         UInt_t returnValue = 0;
816         if (aPPResult == 0) { // Preprocessor error
817                 UpdateShuttleStatus(AliShuttleStatus::kPPError);
818                 returnValue = 0;
819         } else if (fGridError == kFALSE) { // process and Grid storage ok!
820                 UpdateShuttleStatus(AliShuttleStatus::kDone);
821                 UpdateShuttleLogbook(fCurrentDetector, "DONE");
822                 Log(fCurrentDetector.Data(),
823                         "ProcessCurrentDetector - Preprocessor and Grid storage ended successfully");
824                 returnValue = 1;
825         } else { // Grid storage error (process ok, but object put in local storage)
826                 UpdateShuttleStatus(AliShuttleStatus::kStoreFailed);
827                 returnValue = 2;
828         }
829
830         dcsMap.DeleteAll();
831
832         return returnValue;
833 }
834
835 //______________________________________________________________________________________________
836 Bool_t AliShuttle::QueryShuttleLogbook(const char* whereClause,
837                 TObjArray& entries)
838 {
839 // Query DAQ's Shuttle logbook and fills detector status object.
840 // Call QueryRunParameters to query DAQ logbook for run parameters.
841
842         // check connection, in case connect
843         if(!Connect(3)) return kFALSE;
844
845         TString sqlQuery;
846         sqlQuery = Form("select * from logbook_shuttle %s order by run", whereClause);
847
848         TSQLResult* aResult = fServer[3]->Query(sqlQuery);
849         if (!aResult) {
850                 AliError(Form("Can't execute query <%s>!", sqlQuery.Data()));
851                 return kFALSE;
852         }
853
854         if(aResult->GetRowCount() == 0) {
855                 if(sqlQuery.Contains("where shuttle_done=0")){
856                         Log("SHUTTLE", "QueryShuttleLogbook - All runs in Shuttle Logbook are already DONE");
857                         delete aResult;
858                         return kTRUE;
859                 } else {
860                         AliError("No entries in Shuttle Logbook match request");
861                         delete aResult;
862                         return kFALSE;
863                 }
864         }
865
866         // TODO Check field count!
867         const UInt_t nCols = 24;
868         if (aResult->GetFieldCount() != (Int_t) nCols) {
869                 AliError("Invalid SQL result field number!");
870                 delete aResult;
871                 return kFALSE;
872         }
873
874         entries.SetOwner(1);
875
876         TSQLRow* aRow;
877         while ((aRow = aResult->Next())) {
878                 TString runString(aRow->GetField(0), aRow->GetFieldLength(0));
879                 Int_t run = runString.Atoi();
880
881                 AliShuttleLogbookEntry *entry = QueryRunParameters(run);
882                 if (!entry)
883                         continue;
884
885                 // loop on detectors
886                 for(UInt_t ii = 0; ii < nCols; ii++)
887                         entry->SetDetectorStatus(aResult->GetFieldName(ii), aRow->GetField(ii));
888
889                 entries.AddLast(entry);
890                 delete aRow;
891         }
892
893         if(sqlQuery.Contains("where shuttle_done=0"))
894                 Log("SHUTTLE", Form("QueryShuttleLogbook - Found %d unprocessed runs in Shuttle Logbook",
895                                                         entries.GetEntriesFast()));
896         delete aResult;
897         return kTRUE;
898 }
899
900 //______________________________________________________________________________________________
901 AliShuttleLogbookEntry* AliShuttle::QueryRunParameters(Int_t run)
902 {
903         //
904         // Retrieve run parameters written in the DAQ logbook and sets them into AliShuttleLogbookEntry object
905         //
906
907         // check connection, in case connect
908         if (!Connect(3))
909                 return 0;
910
911         TString sqlQuery;
912         sqlQuery.Form("select * from %s where run=%d", fConfig->GetDAQlbTable(), run);
913
914         TSQLResult* aResult = fServer[3]->Query(sqlQuery);
915         if (!aResult) {
916                 AliError(Form("Can't execute query <%s>!", sqlQuery.Data()));
917                 return 0;
918         }
919
920         if (aResult->GetRowCount() == 0) {
921                 Log("SHUTTLE", Form("QueryRunParameters - No entry in DAQ Logbook for run %d. Skipping", run));
922                 delete aResult;
923                 return 0;
924         }
925
926         if (aResult->GetRowCount() > 1) {
927                 AliError(Form("More than one entry in DAQ Logbook for run %d. Skipping", run));
928                 delete aResult;
929                 return 0;
930         }
931
932         TSQLRow* aRow = aResult->Next();
933         if (!aRow)
934         {
935                 AliError(Form("Could not retrieve row for run %d. Skipping", run));
936                 delete aResult;
937                 return 0;
938         }
939
940         AliShuttleLogbookEntry* entry = new AliShuttleLogbookEntry(run);
941
942         for (Int_t ii = 0; ii < aResult->GetFieldCount(); ii++)
943                 entry->SetRunParameter(aResult->GetFieldName(ii), aRow->GetField(ii));
944
945         UInt_t startTime = entry->GetStartTime();
946         UInt_t endTime = entry->GetEndTime();
947
948         if (!startTime || !endTime || startTime > endTime) {
949                 Log("SHUTTLE",
950                         Form("QueryRunParameters - Invalid parameters for Run %d: startTime = %d, endTime = %d",
951                                 run, startTime, endTime));
952                 delete entry;
953                 delete aRow;
954                 delete aResult;
955                 return 0;
956         }
957
958         delete aRow;
959         delete aResult;
960
961         return entry;
962 }
963
964 //______________________________________________________________________________________________
965 Bool_t AliShuttle::TryToStoreAgain()
966 {
967   // Called in case the detector failed to store the object in Grid OCDB
968   // It tries to store the object again, if it does not find more recent and overlapping objects
969   // Calls underlying TryToStoreAgain(const char*) function twice, for OCDB and Reference storage.
970
971         AliInfo("Trying to store OCDB data again...");
972         Bool_t resultCDB = TryToStoreAgain(fgkMainCDB);
973
974         AliInfo("Trying to store reference data again...");
975         Bool_t resultRef = TryToStoreAgain(fgkMainRefStorage);
976
977         return resultCDB && resultRef;
978 }
979
980 //______________________________________________________________________________________________
981 Bool_t AliShuttle::TryToStoreAgain(TString& gridURI)
982 {
983   // Called by TryToStoreAgain(), performs actual storage retry
984
985         TObjArray* gridIds=0;
986
987         Bool_t result = kTRUE;
988
989         const char* type = 0;
990         TString backupURI;
991         if(gridURI == fgkMainCDB) {
992                 type = "OCDB";
993                 backupURI = fgkLocalCDB;
994         } else if(gridURI == fgkMainRefStorage) {
995                 type = "reference";
996                 backupURI = fgkLocalRefStorage;
997         } else {
998                 AliError(Form("Invalid storage URI: %s", gridURI.Data()));
999                 return kFALSE;
1000         }
1001
1002         AliCDBManager* man = AliCDBManager::Instance();
1003
1004         AliCDBStorage *gridSto = man->GetStorage(gridURI);
1005         if(!gridSto) {
1006                 Log(fCurrentDetector.Data(),
1007                         Form("TryToStoreAgain - cannot activate main %s storage", type));
1008                 return kFALSE;
1009         }
1010
1011         gridIds = gridSto->GetQueryCDBList();
1012
1013         // get objects previously stored in local CDB
1014         AliCDBStorage *backupSto = man->GetStorage(backupURI);
1015         AliCDBPath aPath(GetOfflineDetName(fCurrentDetector.Data()),"*","*");
1016         // Local objects were stored with current run as Grid version!
1017         TList* localEntries = backupSto->GetAll(aPath.GetPath(), GetCurrentRun(), GetCurrentRun());
1018         localEntries->SetOwner(1);
1019
1020         // loop on local stored objects
1021         TIter localIter(localEntries);
1022         AliCDBEntry *aLocEntry = 0;
1023         while((aLocEntry = dynamic_cast<AliCDBEntry*> (localIter.Next()))){
1024                 aLocEntry->SetOwner(1);
1025                 AliCDBId aLocId = aLocEntry->GetId();
1026                 aLocEntry->SetVersion(-1);
1027                 aLocEntry->SetSubVersion(-1);
1028
1029                 // loop on Grid valid Id's
1030                 Bool_t store = kTRUE;
1031                 TIter gridIter(gridIds);
1032                 AliCDBId* aGridId = 0;
1033                 while((aGridId = dynamic_cast<AliCDBId*> (gridIter.Next()))){
1034                         // If local object is valid up to infinity we store it only if it is
1035                         // the first unprocessed run!
1036                         if (aLocId.GetLastRun() == AliCDBRunRange::Infinity())
1037                         {
1038                                 if (!fFirstUnprocessed[GetDetPos(fCurrentDetector)])
1039                                 {
1040                                         Log(fCurrentDetector.Data(),
1041                                                 ("TryToStoreAgain - This object has validity infinite but "
1042                                                  "there are previous unprocessed runs!"));
1043                                         continue;
1044                                 } else {
1045                                         break;
1046                                 }
1047                         }
1048                         if(aGridId->GetPath() != aLocId.GetPath()) continue;
1049                         // skip all objects valid up to infinity
1050                         if(aGridId->GetLastRun() == AliCDBRunRange::Infinity()) continue;
1051                         // if we get here, it means there's already some more recent object stored on Grid!
1052                         store = kFALSE;
1053                         break;
1054                 }
1055
1056                 if(!store){
1057                         Log(fCurrentDetector.Data(),
1058                                 Form("TryToStoreAgain - A more recent object already exists in %s storage: <%s>",
1059                                         type, aGridId->ToString().Data()));
1060                         // removing local filename...
1061                         // TODO maybe it's better not to remove it, it was not copied to the Grid!
1062                         TString filename;
1063                         backupSto->IdToFilename(aLocId, filename);
1064                         AliInfo(Form("Removing local file %s", filename.Data()));
1065                         gSystem->Exec(Form("rm %s",filename.Data()));
1066                         continue;
1067                 }
1068
1069                 // If we get here, the file can be stored!
1070                 Bool_t storeOk = gridSto->Put(aLocEntry);
1071                 if(storeOk){
1072                         Log(fCurrentDetector.Data(),
1073                                 Form("TryToStoreAgain - Object <%s> successfully put into %s storage",
1074                                         aLocId.ToString().Data(), type));
1075
1076                         // removing local filename...
1077                         TString filename;
1078                         backupSto->IdToFilename(aLocId, filename);
1079                         AliInfo(Form("Removing local file %s", filename.Data()));
1080                         gSystem->Exec(Form("rm %s", filename.Data()));
1081                         continue;
1082                 } else  {
1083                         Log(fCurrentDetector.Data(),
1084                                 Form("TryToStoreAgain - Grid %s storage of object <%s> failed again",
1085                                         type, aLocId.ToString().Data()));
1086                         result = kFALSE;
1087                 }
1088         }
1089         localEntries->Clear();
1090
1091         return result;
1092 }
1093
1094 //______________________________________________________________________________________________
1095 Bool_t AliShuttle::GetValueSet(const char* host, Int_t port, const char* entry,
1096                                 TObjArray* valueSet, DCSType type)
1097 {
1098 // Retrieve all "entry" data points from the DCS server
1099 // host, port: TSocket connection parameters
1100 // entry: name of the alias or data point
1101 // valueSet: array of retrieved AliDCSValue's
1102 // type: kAlias or kDP
1103
1104         AliDCSClient client(host, port, fTimeout, fRetries);
1105         if (!client.IsConnected())
1106         {
1107                 return kFALSE;
1108         }
1109
1110         Int_t result=0;
1111
1112         if (type == kAlias)
1113         {
1114                 result = client.GetAliasValues(entry,
1115                         GetCurrentStartTime(), GetCurrentEndTime(), valueSet);
1116         } else
1117         if (type == kDP)
1118         {
1119                 result = client.GetDPValues(entry,
1120                         GetCurrentStartTime(), GetCurrentEndTime(), valueSet);
1121         }
1122
1123         if (result < 0)
1124         {
1125                 Log(fCurrentDetector.Data(), Form("GetValueSet - Can't get '%s'! Reason: %s",
1126                         entry, AliDCSClient::GetErrorString(result)));
1127
1128                 if (result == AliDCSClient::fgkServerError)
1129                 {
1130                         Log(fCurrentDetector.Data(), Form("GetValueSet - Server error: %s",
1131                                 client.GetServerError().Data()));
1132                 }
1133
1134                 return kFALSE;
1135         }
1136
1137         return kTRUE;
1138 }
1139
1140 //______________________________________________________________________________________________
1141 const char* AliShuttle::GetFile(Int_t system, const char* detector,
1142                 const char* id, const char* source)
1143 {
1144 // Get calibration file from file exchange servers
1145 // calls specific getter according to system index (kDAQ, kDCS, kHLT)
1146
1147         switch(system){
1148                 case kDAQ:
1149                         return GetDAQFileName(detector, id, source);
1150                         break;
1151                 case kDCS:
1152                         return GetDCSFileName(detector, id, source);
1153                         break;
1154                 case kHLT:
1155                         return GetHLTFileName(detector, id, source);
1156                         break;
1157                 default:
1158                         AliError(Form("No valid system index: %d",system));
1159         }
1160
1161         return 0;
1162 }
1163
1164 //______________________________________________________________________________________________
1165 TList* AliShuttle::GetFileSources(Int_t system, const char* detector, const char* id)
1166 {
1167 // Get sources producing the condition file Id from file exchange servers
1168 // calls specific getter according to system index (kDAQ, kDCS, kHLT)
1169
1170         switch(system){
1171                 case kDAQ:
1172                         return GetDAQFileSources(detector, id);
1173                         break;
1174                 case kDCS:
1175                         return GetDCSFileSources(detector, id);
1176                         break;
1177                 case kHLT:
1178                         return GetHLTFileSources(detector, id);
1179                         break;
1180                 default:
1181                         AliError(Form("No valid system index: %d",system));
1182         }
1183
1184         return NULL;
1185 }
1186
1187 //______________________________________________________________________________________________
1188 Bool_t AliShuttle::Connect(Int_t system)
1189 {
1190 // Connect to MySQL Server of the system's FXS MySQL databases
1191 // DAQ Logbook, Shuttle Logbook and DAQ FXS db are on the same host
1192
1193         // check connection: if already connected return
1194         if(fServer[system] && fServer[system]->IsConnected()) return kTRUE;
1195
1196         TString dbHost, dbUser, dbPass, dbName;
1197
1198         if (system < 3) // FXS db servers
1199         {
1200                 dbHost = Form("mysql://%s", fConfig->GetFXSdbHost(system));
1201                 dbUser = fConfig->GetFXSdbUser(system);
1202                 dbPass = fConfig->GetFXSdbPass(system);
1203                 dbName =   fConfig->GetFXSdbName(system);
1204         } else { // Run & Shuttle logbook servers
1205         // TODO Will the Shuttle logbook server be the same as the Run logbook server ???
1206                 dbHost = Form("mysql://%s", fConfig->GetDAQlbHost());
1207                 dbUser = fConfig->GetDAQlbUser();
1208                 dbPass = fConfig->GetDAQlbPass();
1209                 dbName =   fConfig->GetDAQlbDB();
1210         }
1211
1212         fServer[system] = TSQLServer::Connect(dbHost.Data(), dbUser.Data(), dbPass.Data());
1213         if (!fServer[system] || !fServer[system]->IsConnected()) {
1214                 if(system < 3)
1215                 {
1216                 AliError(Form("Can't establish connection to FXS database for %s",
1217                                         AliShuttleInterface::GetSystemName(system)));
1218                 } else {
1219                 AliError("Can't establish connection to Run logbook.");
1220                 }
1221                 if(fServer[system]) delete fServer[system];
1222                 return kFALSE;
1223         }
1224
1225         // Get tables
1226         // TODO in the configuration should the table name be there too?
1227         TSQLResult* aResult=0;
1228         switch(system){
1229                 case kDAQ:
1230                         aResult = fServer[kDAQ]->GetTables(dbName.Data());
1231                         break;
1232                 case kDCS:
1233                         //aResult = fServer[kDCS]->GetTables(dbName.Data());
1234                         break;
1235                 case kHLT:
1236                         //aResult = fServer[kHLT]->GetTables(dbName.Data());
1237                         break;
1238                 default:
1239                         aResult = fServer[3]->GetTables(dbName.Data());
1240                         break;
1241         }
1242
1243         delete aResult;
1244         return kTRUE;
1245 }
1246
1247 //______________________________________________________________________________________________
1248 const char* AliShuttle::GetDAQFileName(const char* detector, const char* id, const char* source)
1249 {
1250 // Retrieves a file from the DAQ FXS.
1251 // First queris the DAQ FXS database for the DAQ file name, using the run, detector, id and source info
1252 // then calls RetrieveDAQFile(DAQfilename) for actual copy to local disk
1253 // run: current run being processed (given by Logbook entry fLogbookEntry)
1254 // detector: the Preprocessor name
1255 // id: provided as a parameter by the Preprocessor
1256 // source: provided by the Preprocessor through GetFileSources function
1257
1258         // check connection, in case connect
1259         if (!Connect(kDAQ))
1260         {
1261                 Log(detector, "GetDAQFileName - Couldn't connect to DAQ FXS database");
1262                 return 0;
1263         }
1264
1265         // Query preparation
1266         TString sqlQueryStart = Form("select filePath from %s where", fConfig->GetFXSdbTable(kDAQ));
1267         TString whereClause = Form("run=%d and detector=\"%s\" and fileId=\"%s\" and DAQsource=\"%s\"",
1268                                 GetCurrentRun(), detector, id, source);
1269         TString sqlQuery = Form("%s %s", sqlQueryStart.Data(), whereClause.Data());
1270
1271         AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
1272
1273         // Query execution
1274         TSQLResult* aResult = 0;
1275         aResult = dynamic_cast<TSQLResult*> (fServer[kDAQ]->Query(sqlQuery));
1276         if (!aResult) {
1277                 Log(detector, Form("GetDAQFileName - Can't execute SQL query for: id = %s, source = %s",
1278                                 id, source));
1279                 return 0;
1280         }
1281
1282         if(aResult->GetRowCount() == 0)
1283         {
1284                 Log(detector,
1285                         Form("GetDAQFileName - No entry in FXS table for: id = %s, source = %s",
1286                                 id, source));
1287                 delete aResult;
1288                 return 0;
1289         }
1290
1291         if (aResult->GetRowCount() > 1) {
1292                 Log(detector,
1293                         Form("GetDAQFileName - More than one entry in FXS table for: id = %s, source = %s",
1294                                 id, source));
1295                 delete aResult;
1296                 return 0;
1297         }
1298
1299         TSQLRow* aRow = dynamic_cast<TSQLRow*> (aResult->Next());
1300
1301         if (!aRow){
1302                 Log(detector, Form("GetDAQFileName - Empty set result from query: id = %s, source = %s",
1303                                 id, source));
1304                 delete aResult;
1305                 return 0;
1306         }
1307
1308         TString filePath(aRow->GetField(0), aRow->GetFieldLength(0));
1309
1310         delete aResult;
1311         delete aRow;
1312
1313         AliDebug(2, Form("filePath = %s",filePath.Data()));
1314
1315         // retrieved file is renamed to make it unique
1316         TString localFileName = Form("%s_%d_%s_%s.shuttle",
1317                                         detector, GetCurrentRun(), id, source);
1318
1319         // file retrieval from DAQ FXS
1320         Bool_t result = RetrieveDAQFile(filePath.Data(), localFileName.Data());
1321         if(!result) {
1322                 Log(detector, Form("GetDAQFileName - Copy of file %s from DAQ FXS failed", filePath.Data()));
1323                 return 0;
1324         } else {
1325                 AliInfo(Form("File %s copied from DAQ FXS into %s/%s",
1326                         filePath.Data(), fgkShuttleTempDir, localFileName.Data()));
1327         }
1328
1329
1330         fFXSCalled[kDAQ]=kTRUE;
1331         TObjString *fileParams = new TObjString(Form("%s_!?!_%s", id, source));
1332         fFXSlist[kDAQ].Add(fileParams);
1333
1334         return localFileName.Data();
1335
1336 }
1337
1338 //______________________________________________________________________________________________
1339 Bool_t AliShuttle::RetrieveDAQFile(const char* daqFileName, const char* localFileName)
1340 {
1341
1342         // check temp directory: trying to cd to temp; if it does not exist, create it
1343         AliDebug(2, Form("Copy file %s from DAQ FXS into folder %s and rename it as %s",
1344                         daqFileName,fgkShuttleTempDir, localFileName));
1345
1346         void* dir = gSystem->OpenDirectory(fgkShuttleTempDir);
1347         if (dir == NULL) {
1348                 if (gSystem->mkdir(fgkShuttleTempDir, kTRUE)) {
1349                         AliError(Form("Can't open directory <%s>", fgkShuttleTempDir));
1350                         return kFALSE;
1351                 }
1352
1353         } else {
1354                 gSystem->FreeDirectory(dir);
1355         }
1356
1357         TString baseDAQFXSFolder = "DAQ";
1358         TString command = Form("scp %s@%s:%s/%s %s/%s",
1359                 fConfig->GetFXSUser(kDAQ),
1360                 fConfig->GetFXSHost(kDAQ),
1361                 baseDAQFXSFolder.Data(),
1362                 daqFileName,
1363                 fgkShuttleTempDir,
1364                 localFileName);
1365
1366         AliDebug(2, Form("%s",command.Data()));
1367
1368         UInt_t nRetries = 0;
1369         UInt_t maxRetries = 3;
1370
1371         // copy!! if successful TSystem::Exec returns 0
1372         while(nRetries++ < maxRetries) {
1373                 AliDebug(2, Form("Trying to copy file. Retry # %d", nRetries));
1374                 if(gSystem->Exec(command.Data()) == 0) return kTRUE;
1375         }
1376
1377         return kFALSE;
1378
1379 }
1380
1381 //______________________________________________________________________________________________
1382 TList* AliShuttle::GetDAQFileSources(const char* detector, const char* id)
1383 {
1384 // Retrieves a file from the DCS FXS.
1385
1386         // check connection, in case connect
1387         if(!Connect(kDAQ)){
1388                 Log(detector, "GetDAQFileSources - Couldn't connect to DAQ FXS database");
1389                 return 0;
1390         }
1391
1392         // Query preparation
1393         TString sqlQueryStart = Form("select DAQsource from %s where", fConfig->GetFXSdbTable(kDAQ));
1394         TString whereClause = Form("run=%d and detector=\"%s\" and fileId=\"%s\"",
1395                                 GetCurrentRun(), detector, id);
1396         TString sqlQuery = Form("%s %s", sqlQueryStart.Data(), whereClause.Data());
1397
1398         AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
1399
1400         // Query execution
1401         TSQLResult* aResult;
1402         aResult = fServer[kDAQ]->Query(sqlQuery);
1403         if (!aResult) {
1404                 Log(detector, Form("GetDAQFileSources - Can't execute SQL query for id: %s", id));
1405                 return 0;
1406         }
1407
1408         if (aResult->GetRowCount() == 0) {
1409                 Log(detector,
1410                         Form("GetDAQFileSources - No entry in FXS table for id: %s", id));
1411                 delete aResult;
1412                 return 0;
1413         }
1414
1415         TSQLRow* aRow;
1416         TList *list = new TList();
1417         list->SetOwner(1);
1418
1419         while((aRow = aResult->Next())){
1420
1421                 TString daqSource(aRow->GetField(0), aRow->GetFieldLength(0));
1422                 AliDebug(2, Form("daqSource = %s", daqSource.Data()));
1423                 list->Add(new TObjString(daqSource));
1424                 delete aRow;
1425         }
1426         delete aResult;
1427
1428         return list;
1429
1430 }
1431
1432 //______________________________________________________________________________________________
1433 const char* AliShuttle::GetDCSFileName(const char* /*detector*/, const char* /*id*/, const char* /*source*/){
1434 // Retrieves a file from the DCS FXS.
1435
1436 return "You're in DCS";
1437
1438 }
1439
1440 //______________________________________________________________________________________________
1441 TList* AliShuttle::GetDCSFileSources(const char* /*detector*/, const char* /*id*/){
1442 // Retrieves file sources from the DCS FXS.
1443
1444 return NULL;
1445
1446 }
1447
1448 //______________________________________________________________________________________________
1449 const char* AliShuttle::GetHLTFileName(const char* /*detector*/, const char* /*id*/, const char* /*source*/){
1450 // Retrieves a file from the HLT FXS.
1451
1452 return "You're in HLT";
1453
1454 }
1455
1456 //______________________________________________________________________________________________
1457 TList* AliShuttle::GetHLTFileSources(const char* /*detector*/, const char* /*id*/){
1458 // Retrieves file sources from the HLT FXS.
1459
1460 return NULL;
1461
1462 }
1463
1464 //______________________________________________________________________________________________
1465 Bool_t AliShuttle::UpdateDAQTable()
1466 {
1467 // Update DAQ table filling time_processed field in all rows corresponding to current run and detector
1468
1469         // check connection, in case connect
1470         if(!Connect(kDAQ)){
1471                 Log(fCurrentDetector, "UpdateDAQTable - Couldn't connect to DAQ FXS database");
1472                 return kFALSE;
1473         }
1474
1475         TTimeStamp now; // now
1476
1477         // Loop on FXS list entries
1478         TIter iter(&fFXSlist[kDAQ]);
1479         TObjString *aFXSentry=0;
1480         while((aFXSentry = dynamic_cast<TObjString*> (iter.Next()))){
1481                 TString aFXSentrystr = aFXSentry->String();
1482                 TObjArray *aFXSarray = aFXSentrystr.Tokenize("_!?!_");
1483                 if(!aFXSarray || aFXSarray->GetEntries() != 2 ) {
1484                         Log(fCurrentDetector, Form("UpdateDAQTable - error updating FXS entry. Check string: <%s>",
1485                                 aFXSentrystr.Data()));
1486                         if(aFXSarray) delete aFXSarray;
1487                         return kFALSE;
1488                 }
1489                 const char* fileId = ((TObjString*) aFXSarray->At(0))->GetName();
1490                 const char* daqSource = ((TObjString*) aFXSarray->At(1))->GetName();
1491                 TString whereClause = Form("where run=%d and detector=\"%s\" and fileId=\"%s\" and DAQsource=\"%s\";",
1492                         GetCurrentRun(), fCurrentDetector.Data(), fileId, daqSource);
1493
1494                 delete aFXSarray;
1495
1496                 TString sqlQuery = Form("update %s set time_processed=%d %s", fConfig->GetFXSdbTable(kDAQ),
1497                                                         now.GetSec(), whereClause.Data());
1498
1499                 AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
1500
1501                 // Query execution
1502                 TSQLResult* aResult;
1503                 aResult = dynamic_cast<TSQLResult*> (fServer[3]->Query(sqlQuery));
1504                 if (!aResult) {
1505                         Log(fCurrentDetector, Form("UpdateDAQTable - Can't execute SQL query <%s>", sqlQuery.Data()));
1506                         return kFALSE;
1507                 }
1508                 delete aResult;
1509         }
1510
1511         return kTRUE;
1512 }
1513
1514
1515 //______________________________________________________________________________________________
1516 Bool_t AliShuttle::UpdateShuttleLogbook(const char* detector, const char* status)
1517 {
1518 // Update Shuttle logbook filling detector or shuttle_done column
1519 // ex. of usage: UpdateShuttleLogbook("PHOS", "DONE") or UpdateShuttleLogbook("shuttle_done")
1520
1521         // check connection, in case connect
1522         if(!Connect(3)){
1523                 Log("SHUTTLE", "UpdateShuttleLogbook - Couldn't connect to DAQ Logbook.");
1524                 return kFALSE;
1525         }
1526
1527         TString detName(detector);
1528         TString setClause;
1529         if(detName == "shuttle_done") {
1530                 setClause = "set shuttle_done=1";
1531         } else {
1532                 TString statusStr(status);
1533                 if(statusStr.Contains("done", TString::kIgnoreCase) ||
1534                    statusStr.Contains("failed", TString::kIgnoreCase)){
1535                         setClause = Form("set %s=\"%s\"", detector, status);
1536                 } else {
1537                         Log("SHUTTLE",
1538                                 Form("UpdateShuttleLogbook - Invalid status <%s> for detector %s",
1539                                         status, detector));
1540                         return kFALSE;
1541                 }
1542         }
1543
1544         TString whereClause = Form("where run=%d", GetCurrentRun());
1545
1546         TString sqlQuery = Form("update logbook_shuttle %s %s",
1547                                         setClause.Data(), whereClause.Data());
1548
1549         AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
1550
1551         // Query execution
1552         TSQLResult* aResult;
1553         aResult = dynamic_cast<TSQLResult*> (fServer[3]->Query(sqlQuery));
1554         if (!aResult) {
1555                 Log("SHUTTLE", Form("UpdateShuttleLogbook - Can't execute query <%s>", sqlQuery.Data()));
1556                 return kFALSE;
1557         }
1558         delete aResult;
1559
1560         return kTRUE;
1561 }
1562
1563 //______________________________________________________________________________________________
1564 Int_t AliShuttle::GetCurrentRun() const
1565 {
1566 // Get current run from logbook entry
1567
1568         return fLogbookEntry ? fLogbookEntry->GetRun() : -1;
1569 }
1570
1571 //______________________________________________________________________________________________
1572 UInt_t AliShuttle::GetCurrentStartTime() const
1573 {
1574 // get current start time
1575
1576         return fLogbookEntry ? fLogbookEntry->GetStartTime() : 0;
1577 }
1578
1579 //______________________________________________________________________________________________
1580 UInt_t AliShuttle::GetCurrentEndTime() const
1581 {
1582 // get current end time from logbook entry
1583
1584         return fLogbookEntry ? fLogbookEntry->GetEndTime() : 0;
1585 }
1586
1587 //______________________________________________________________________________________________
1588 void AliShuttle::Log(const char* detector, const char* message)
1589 {
1590 // Fill log string with a message
1591
1592         void* dir = gSystem->OpenDirectory(fgkShuttleLogDir);
1593         if (dir == NULL) {
1594                 if (gSystem->mkdir(fgkShuttleLogDir, kTRUE)) {
1595                         AliError(Form("Can't open directory <%s>", fgkShuttleTempDir));
1596                         return;
1597                 }
1598
1599         } else {
1600                 gSystem->FreeDirectory(dir);
1601         }
1602
1603         TString toLog = Form("%s (%d): %s - ", TTimeStamp(time(0)).AsString("s"), getpid(), detector);
1604         if(GetCurrentRun()>=0 ) toLog += Form("run %d - ", GetCurrentRun());
1605         toLog += Form("%s", message);
1606
1607         AliInfo(toLog.Data());
1608
1609         TString fileName;
1610         fileName.Form("%s/%s.log", fgkShuttleLogDir, detector);
1611         gSystem->ExpandPathName(fileName);
1612
1613         ofstream logFile;
1614         logFile.open(fileName, ofstream::out | ofstream::app);
1615
1616         if (!logFile.is_open()) {
1617                 AliError(Form("Could not open file %s", fileName.Data()));
1618                 return;
1619         }
1620
1621         logFile << toLog.Data() << "\n";
1622
1623         logFile.close();
1624 }
1625
1626 //______________________________________________________________________________________________
1627 Bool_t AliShuttle::Collect(Int_t run)
1628 {
1629 //
1630 // Collects conditions data for all UNPROCESSED run written to DAQ LogBook in case of run = -1 (default)
1631 // If a dedicated run is given this run is processed
1632 //
1633 // In operational mode, this is the Shuttle function triggered by the EOR signal.
1634 //
1635
1636         if (run == -1)
1637                 Log("SHUTTLE","Collect - Shuttle called. Collecting conditions data for unprocessed runs");
1638         else
1639                 Log("SHUTTLE", Form("Collect - Shuttle called. Collecting conditions data for run %d", run));
1640
1641         SetLastAction("Starting");
1642
1643         TString whereClause("where shuttle_done=0");
1644         if (run != -1)
1645                 whereClause += Form(" and run=%d", run);
1646
1647         TObjArray shuttleLogbookEntries;
1648         if (!QueryShuttleLogbook(whereClause, shuttleLogbookEntries))
1649         {
1650                 Log("SHUTTLE", "Collect - Can't retrieve entries from Shuttle logbook");
1651                 return kFALSE;
1652         }
1653
1654         for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
1655                 fFirstUnprocessed[iDet] = kTRUE;
1656
1657         if (run != 1)
1658         {
1659                 // query Shuttle logbook for earlier runs, check if some detectors are unprocessed,
1660                 // flag them into fFirstUnprocessed array
1661                 TString whereClause(Form("where shuttle_done=0 and run < %d", run));
1662                 TObjArray tmpLogbookEntries;
1663                 if (!QueryShuttleLogbook(whereClause, tmpLogbookEntries))
1664                 {
1665                         Log("SHUTTLE", "Collect - Can't retrieve entries from Shuttle logbook");
1666                         return kFALSE;
1667                 }
1668
1669                 TIter iter(&tmpLogbookEntries);
1670                 AliShuttleLogbookEntry* anEntry = 0;
1671                 while ((anEntry = dynamic_cast<AliShuttleLogbookEntry*> (iter.Next())))
1672                 {
1673                         for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
1674                         {
1675                                 if (anEntry->GetDetectorStatus(iDet) == AliShuttleLogbookEntry::kUnprocessed)
1676                                 {
1677                                         AliDebug(2, Form("Run %d: setting %s as \"not first time unprocessed\"",
1678                                                         anEntry->GetRun(), GetDetName(iDet)));
1679                                         fFirstUnprocessed[iDet] = kFALSE;
1680                                 }
1681                         }
1682
1683                 }
1684
1685         }
1686
1687         if (!RetrieveConditionsData(shuttleLogbookEntries))
1688         {
1689                 Log("SHUTTLE", "Collect - Process of at least one run failed");
1690                 return kFALSE;
1691         }
1692
1693         return kTRUE;
1694 }
1695
1696 //______________________________________________________________________________________________
1697 Bool_t AliShuttle::RetrieveConditionsData(const TObjArray& dateEntries)
1698 {
1699 // Retrieve conditions data for all runs that aren't processed yet
1700
1701         Bool_t hasError = kFALSE;
1702
1703         TIter iter(&dateEntries);
1704         AliShuttleLogbookEntry* anEntry;
1705
1706         while ((anEntry = (AliShuttleLogbookEntry*) iter.Next())){
1707                 if (!Process(anEntry)){
1708                         hasError = kTRUE;
1709                 }
1710         }
1711
1712         return hasError == kFALSE;
1713 }
1714
1715 //______________________________________________________________________________________________
1716 ULong_t AliShuttle::GetTimeOfLastAction() const
1717 {
1718         ULong_t tmp;
1719         
1720         fMonitoringMutex->Lock();
1721
1722         tmp = fLastActionTime;
1723         
1724         fMonitoringMutex->UnLock();
1725         
1726         return tmp;
1727 }
1728
1729 //______________________________________________________________________________________________
1730 const TString AliShuttle::GetLastAction() const
1731 {
1732         // returns a string description of the last action
1733
1734         TString tmp;
1735         
1736         fMonitoringMutex->Lock();
1737         
1738         tmp = fLastAction;
1739         
1740         fMonitoringMutex->UnLock();
1741
1742         return tmp;     
1743 }
1744
1745 //______________________________________________________________________________________________
1746 void AliShuttle::SetLastAction(const char* action)
1747 {
1748         // updates the monitoring variables
1749         
1750         fMonitoringMutex->Lock();
1751         
1752         fLastAction = action;
1753         fLastActionTime = time(0);
1754         
1755         fMonitoringMutex->UnLock();
1756 }
1757
1758 //______________________________________________________________________________________________
1759 const char* AliShuttle::GetRunParameter(const char* param)
1760 {
1761 // returns run parameter read from DAQ logbook
1762
1763         if(!fLogbookEntry) {
1764                 AliError("No logbook entry!");
1765                 return 0;
1766         }
1767
1768         return fLogbookEntry->GetRunParameter(param);
1769 }