]> git.uio.no Git - u/mrichter/AliRoot.git/blob - SHUTTLE/AliShuttle.cxx
adding log messages in getfile... functions
[u/mrichter/AliRoot.git] / SHUTTLE / AliShuttle.cxx
1 /**************************************************************************
2  * Copyright(c) 1998-1999, ALICE Experiment at CERN, All rights reserved. *
3  *                                                                        *
4  * Author: The ALICE Off-line Project.                                    *
5  * Contributors are mentioned in the code where appropriate.              *
6  *                                                                        *
7  * Permission to use, copy, modify and distribute this software and its   *
8  * documentation strictly for non-commercial purposes is hereby granted   *
9  * without fee, provided that the above copyright notice appears in all   *
10  * copies and that both the copyright notice and this permission notice   *
11  * appear in the supporting documentation. The authors make no claims     *
12  * about the suitability of this software for any purpose. It is          *
13  * provided "as is" without express or implied warranty.                  *
14  **************************************************************************/
15
16 /*
17 $Log$
18 Revision 1.51  2007/07/03 17:24:52  acolla
19 root moved to v5-16-00. TFileMerger->Cp moved to TFile::Cp.
20
21 Revision 1.50  2007/07/02 17:19:32  acolla
22 preprocessor is run in a temp directory that is removed when process is finished.
23
24 Revision 1.49  2007/06/29 10:45:06  acolla
25 Number of columns in MySql Shuttle logbook increased by one (HLT added)
26
27 Revision 1.48  2007/06/21 13:06:19  acolla
28 GetFileSources returns dummy list with 1 source if system=DCS (better than
29 returning error as it was)
30
31 Revision 1.47  2007/06/19 17:28:56  acolla
32 HLT updated; missing map bug removed.
33
34 Revision 1.46  2007/06/09 13:01:09  jgrosseo
35 Switching to retrieval of several DCS DPs at a time (multiDPrequest)
36
37 Revision 1.45  2007/05/30 06:35:20  jgrosseo
38 Adding functionality to the Shuttle/TestShuttle:
39 o) Function to retrieve list of sources from a given system (GetFileSources with id=0)
40 o) Function to retrieve list of IDs for a given source      (GetFileIDs)
41 These functions are needed for dealing with the tag files that are saved for the GRP preprocessor
42 Example code has been added to the TestProcessor in TestShuttle
43
44 Revision 1.44  2007/05/11 16:09:32  acolla
45 Reference files for ITS, MUON and PHOS are now stored in OfflineDetName/OnlineDetName/run_...
46 example: ITS/SPD/100_filename.root
47
48 Revision 1.43  2007/05/10 09:59:51  acolla
49 Various bug fixes in StoreRefFilesToGrid; Cleaning of reference storage before processing detector (CleanReferenceStorage)
50
51 Revision 1.42  2007/05/03 08:01:39  jgrosseo
52 typo in last commit :-(
53
54 Revision 1.41  2007/05/03 08:00:48  jgrosseo
55 fixing log message when pp want to skip dcs value retrieval
56
57 Revision 1.40  2007/04/27 07:06:48  jgrosseo
58 GetFileSources returns empty list in case of no files, but successful query
59 No mails sent in testmode
60
61 Revision 1.39  2007/04/17 12:43:57  acolla
62 Correction in StoreOCDB; change of text in mail to detector expert
63
64 Revision 1.38  2007/04/12 08:26:18  jgrosseo
65 updated comment
66
67 Revision 1.37  2007/04/10 16:53:14  jgrosseo
68 redirecting sub detector stdout, stderr to sub detector log file
69
70 Revision 1.35  2007/04/04 16:26:38  acolla
71 1. Re-organization of function calls in TestPreprocessor to make it more meaningful.
72 2. Added missing dependency in test preprocessors.
73 3. in AliShuttle.cxx: processing time and memory consumption info on a single line.
74
75 Revision 1.34  2007/04/04 10:33:36  jgrosseo
76 1) Storing of files to the Grid is now done _after_ your preprocessors succeeded. This is transparent, which means that you can still use the same functions (Store, StoreReferenceData) to store files to the Grid. However, the Shuttle first stores them locally and transfers them after the preprocessor finished. The return code of these two functions has changed from UInt_t to Bool_t which gives you the success of the storing.
77 In case of an error with the Grid, the Shuttle will retry the storing later, the preprocessor does not need to be run again.
78
79 2) The meaning of the return code of the preprocessor has changed. 0 is now success and any other value means failure. This value is stored in the log and you can use it to keep details about the error condition.
80
81 3) New function StoreReferenceFile to _directly_ store a file (without opening it) to the reference storage.
82
83 4) The memory usage of the preprocessor is monitored. If it exceeds 2 GB it is terminated.
84
85 5) New function AliPreprocessor::ProcessDCS(). If you do not need to have DCS data in all cases, you can skip the processing by implemting this function and returning kFALSE under certain conditions. E.g. if there is a certain run type.
86 If you always need DCS data (like before), you do not need to implement it.
87
88 6) The run type has been added to the monitoring page
89
90 Revision 1.33  2007/04/03 13:56:01  acolla
91 Grid Storage at the end of preprocessing. Added virtual method to disable DCS query according to the
92 run type.
93
94 Revision 1.32  2007/02/28 10:41:56  acolla
95 Run type field added in SHUTTLE framework. Run type is read from "run type" logbook and retrieved by
96 AliPreprocessor::GetRunType() function.
97 Added some ldap definition files.
98
99 Revision 1.30  2007/02/13 11:23:21  acolla
100 Moved getters and setters of Shuttle's main OCDB/Reference, local
101 OCDB/Reference, temp and log folders to AliShuttleInterface
102
103 Revision 1.27  2007/01/30 17:52:42  jgrosseo
104 adding monalisa monitoring
105
106 Revision 1.26  2007/01/23 19:20:03  acolla
107 Removed old ldif files, added TOF, MCH ldif files. Added some options in
108 AliShuttleConfig::Print. Added in Ali Shuttle: SetShuttleTempDir and
109 SetShuttleLogDir
110
111 Revision 1.25  2007/01/15 19:13:52  acolla
112 Moved some AliInfo to AliDebug in SendMail function
113
114 Revision 1.21  2006/12/07 08:51:26  jgrosseo
115 update (alberto):
116 table, db names in ldap configuration
117 added GRP preprocessor
118 DCS data can also be retrieved by data point
119
120 Revision 1.20  2006/11/16 16:16:48  jgrosseo
121 introducing strict run ordering flag
122 removed giving preprocessor name to preprocessor, they have to know their name themselves ;-)
123
124 Revision 1.19  2006/11/06 14:23:04  jgrosseo
125 major update (Alberto)
126 o) reading of run parameters from the logbook
127 o) online offline naming conversion
128 o) standalone DCSclient package
129
130 Revision 1.18  2006/10/20 15:22:59  jgrosseo
131 o) Adding time out to the execution of the preprocessors: The Shuttle forks and the parent process monitors the child
132 o) Merging Collect, CollectAll, CollectNew function
133 o) Removing implementation of empty copy constructors (declaration still there!)
134
135 Revision 1.17  2006/10/05 16:20:55  jgrosseo
136 adapting to new CDB classes
137
138 Revision 1.16  2006/10/05 15:46:26  jgrosseo
139 applying to the new interface
140
141 Revision 1.15  2006/10/02 16:38:39  jgrosseo
142 update (alberto):
143 fixed memory leaks
144 storing of objects that failed to be stored to the grid before
145 interfacing of shuttle status table in daq system
146
147 Revision 1.14  2006/08/29 09:16:05  jgrosseo
148 small update
149
150 Revision 1.13  2006/08/15 10:50:00  jgrosseo
151 effc++ corrections (alberto)
152
153 Revision 1.12  2006/08/08 14:19:29  jgrosseo
154 Update to shuttle classes (Alberto)
155
156 - Possibility to set the full object's path in the Preprocessor's and
157 Shuttle's  Store functions
158 - Possibility to extend the object's run validity in the same classes
159 ("startValidity" and "validityInfinite" parameters)
160 - Implementation of the StoreReferenceData function to store reference
161 data in a dedicated CDB storage.
162
163 Revision 1.11  2006/07/21 07:37:20  jgrosseo
164 last run is stored after each run
165
166 Revision 1.10  2006/07/20 09:54:40  jgrosseo
167 introducing status management: The processing per subdetector is divided into several steps,
168 after each step the status is stored on disk. If the system crashes in any of the steps the Shuttle
169 can keep track of the number of failures and skips further processing after a certain threshold is
170 exceeded. These thresholds can be configured in LDAP.
171
172 Revision 1.9  2006/07/19 10:09:55  jgrosseo
173 new configuration, accesst to DAQ FES (Alberto)
174
175 Revision 1.8  2006/07/11 12:44:36  jgrosseo
176 adding parameters for extended validity range of data produced by preprocessor
177
178 Revision 1.7  2006/07/10 14:37:09  jgrosseo
179 small fix + todo comment
180
181 Revision 1.6  2006/07/10 13:01:41  jgrosseo
182 enhanced storing of last sucessfully processed run (alberto)
183
184 Revision 1.5  2006/07/04 14:59:57  jgrosseo
185 revision of AliDCSValue: Removed wrapper classes, reduced storage size per value by factor 2
186
187 Revision 1.4  2006/06/12 09:11:16  jgrosseo
188 coding conventions (Alberto)
189
190 Revision 1.3  2006/06/06 14:26:40  jgrosseo
191 o) removed files that were moved to STEER
192 o) shuttle updated to follow the new interface (Alberto)
193
194 Revision 1.2  2006/03/07 07:52:34  hristov
195 New version (B.Yordanov)
196
197 Revision 1.6  2005/11/19 17:19:14  byordano
198 RetrieveDATEEntries and RetrieveConditionsData added
199
200 Revision 1.5  2005/11/19 11:09:27  byordano
201 AliShuttle declaration added
202
203 Revision 1.4  2005/11/17 17:47:34  byordano
204 TList changed to TObjArray
205
206 Revision 1.3  2005/11/17 14:43:23  byordano
207 import to local CVS
208
209 Revision 1.1.1.1  2005/10/28 07:33:58  hristov
210 Initial import as subdirectory in AliRoot
211
212 Revision 1.2  2005/09/13 08:41:15  byordano
213 default startTime endTime added
214
215 Revision 1.4  2005/08/30 09:13:02  byordano
216 some docs added
217
218 Revision 1.3  2005/08/29 21:15:47  byordano
219 some docs added
220
221 */
222
223 //
224 // This class is the main manager for AliShuttle. 
225 // It organizes the data retrieval from DCS and call the 
226 // interface methods of AliPreprocessor.
227 // For every detector in AliShuttleConfgi (see AliShuttleConfig),
228 // data for its set of aliases is retrieved. If there is registered
229 // AliPreprocessor for this detector then it will be used
230 // accroding to the schema (see AliPreprocessor).
231 // If there isn't registered AliPreprocessor than the retrieved
232 // data is stored automatically to the undelying AliCDBStorage.
233 // For detSpec is used the alias name.
234 //
235
236 #include "AliShuttle.h"
237
238 #include "AliCDBManager.h"
239 #include "AliCDBStorage.h"
240 #include "AliCDBId.h"
241 #include "AliCDBRunRange.h"
242 #include "AliCDBPath.h"
243 #include "AliCDBEntry.h"
244 #include "AliShuttleConfig.h"
245 #include "DCSClient/AliDCSClient.h"
246 #include "AliLog.h"
247 #include "AliPreprocessor.h"
248 #include "AliShuttleStatus.h"
249 #include "AliShuttleLogbookEntry.h"
250
251 #include <TSystem.h>
252 #include <TObject.h>
253 #include <TString.h>
254 #include <TTimeStamp.h>
255 #include <TObjString.h>
256 #include <TSQLServer.h>
257 #include <TSQLResult.h>
258 #include <TSQLRow.h>
259 #include <TMutex.h>
260 #include <TSystemDirectory.h>
261 #include <TSystemFile.h>
262 #include <TFile.h>
263 #include <TFileMerger.h>
264 #include <TGrid.h>
265 #include <TGridResult.h>
266
267 #include <TMonaLisaWriter.h>
268
269 #include <fstream>
270
271 #include <sys/types.h>
272 #include <sys/wait.h>
273
274 ClassImp(AliShuttle)
275
276 //______________________________________________________________________________________________
277 AliShuttle::AliShuttle(const AliShuttleConfig* config,
278                 UInt_t timeout, Int_t retries):
279 fConfig(config),
280 fTimeout(timeout), fRetries(retries),
281 fPreprocessorMap(),
282 fLogbookEntry(0),
283 fCurrentDetector(),
284 fStatusEntry(0),
285 fMonitoringMutex(0),
286 fLastActionTime(0),
287 fLastAction(),
288 fMonaLisa(0),
289 fTestMode(kNone),
290 fReadTestMode(kFALSE),
291 fOutputRedirected(kFALSE)
292 {
293         //
294         // config: AliShuttleConfig used
295         // timeout: timeout used for AliDCSClient connection
296         // retries: the number of retries in case of connection error.
297         //
298
299         if (!fConfig->IsValid()) AliFatal("********** !!!!! Invalid configuration !!!!! **********");
300         for(int iSys=0;iSys<4;iSys++) {
301                 fServer[iSys]=0;
302                 if (iSys < 3)
303                         fFXSlist[iSys].SetOwner(kTRUE);
304         }
305         fPreprocessorMap.SetOwner(kTRUE);
306
307         for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
308                 fFirstUnprocessed[iDet] = kFALSE;
309
310         fMonitoringMutex = new TMutex();
311 }
312
313 //______________________________________________________________________________________________
314 AliShuttle::~AliShuttle()
315 {
316         //
317         // destructor
318         //
319
320         fPreprocessorMap.DeleteAll();
321         for(int iSys=0;iSys<4;iSys++)
322                 if(fServer[iSys]) {
323                         fServer[iSys]->Close();
324                         delete fServer[iSys];
325                         fServer[iSys] = 0;
326                 }
327
328         if (fStatusEntry){
329                 delete fStatusEntry;
330                 fStatusEntry = 0;
331         }
332         
333         if (fMonitoringMutex) 
334         {
335                 delete fMonitoringMutex;
336                 fMonitoringMutex = 0;
337         }
338 }
339
340 //______________________________________________________________________________________________
341 void AliShuttle::RegisterPreprocessor(AliPreprocessor* preprocessor)
342 {
343         //
344         // Registers new AliPreprocessor.
345         // It uses GetName() for indentificator of the pre processor.
346         // The pre processor is registered it there isn't any other
347         // with the same identificator (GetName()).
348         //
349
350         const char* detName = preprocessor->GetName();
351         if(GetDetPos(detName) < 0)
352                 AliFatal(Form("********** !!!!! Invalid detector name: %s !!!!! **********", detName));
353
354         if (fPreprocessorMap.GetValue(detName)) {
355                 AliWarning(Form("AliPreprocessor %s is already registered!", detName));
356                 return;
357         }
358
359         fPreprocessorMap.Add(new TObjString(detName), preprocessor);
360 }
361 //______________________________________________________________________________________________
362 Bool_t AliShuttle::Store(const AliCDBPath& path, TObject* object,
363                 AliCDBMetaData* metaData, Int_t validityStart, Bool_t validityInfinite)
364 {
365         // Stores a CDB object in the storage for offline reconstruction. Objects that are not needed for
366         // offline reconstruction, but should be stored anyway (e.g. for debugging) should NOT be stored
367         // using this function. Use StoreReferenceData instead!
368         // It calls StoreLocally function which temporarily stores the data locally; when the preprocessor
369         // finishes the data are transferred to the main storage (Grid).
370
371         return StoreLocally(fgkLocalCDB, path, object, metaData, validityStart, validityInfinite);
372 }
373
374 //______________________________________________________________________________________________
375 Bool_t AliShuttle::StoreReferenceData(const AliCDBPath& path, TObject* object, AliCDBMetaData* metaData)
376 {
377         // Stores a CDB object in the storage for reference data. This objects will not be available during
378         // offline reconstrunction. Use this function for reference data only!
379         // It calls StoreLocally function which temporarily stores the data locally; when the preprocessor
380         // finishes the data are transferred to the main storage (Grid).
381
382         return StoreLocally(fgkLocalRefStorage, path, object, metaData);
383 }
384
385 //______________________________________________________________________________________________
386 Bool_t AliShuttle::StoreLocally(const TString& localUri,
387                         const AliCDBPath& path, TObject* object, AliCDBMetaData* metaData,
388                         Int_t validityStart, Bool_t validityInfinite)
389 {
390         // Store object temporarily in local storage. Parameters are passed by Store and StoreReferenceData functions.
391         // when the preprocessor finishes the data are transferred to the main storage (Grid).
392         // The parameters are:
393         //   1) Uri of the backup storage (Local)
394         //   2) the object's path.
395         //   3) the object to be stored
396         //   4) the metaData to be associated with the object
397         //   5) the validity start run number w.r.t. the current run,
398         //      if the data is valid only for this run leave the default 0
399         //   6) specifies if the calibration data is valid for infinity (this means until updated),
400         //      typical for calibration runs, the default is kFALSE
401         //
402         // returns 0 if fail, 1 otherwise
403
404         if (fTestMode & kErrorStorage)
405         {
406                 Log(fCurrentDetector, "StoreLocally - In TESTMODE - Simulating error while storing locally");
407                 return kFALSE;
408         }
409         
410         const char* cdbType = (localUri == fgkLocalCDB) ? "CDB" : "Reference";
411
412         Int_t firstRun = GetCurrentRun() - validityStart;
413         if(firstRun < 0) {
414                 AliWarning("First valid run happens to be less than 0! Setting it to 0.");
415                 firstRun=0;
416         }
417
418         Int_t lastRun = -1;
419         if(validityInfinite) {
420                 lastRun = AliCDBRunRange::Infinity();
421         } else {
422                 lastRun = GetCurrentRun();
423         }
424
425         // Version is set to current run, it will be used later to transfer data to Grid
426         AliCDBId id(path, firstRun, lastRun, GetCurrentRun(), -1);
427
428         if(! dynamic_cast<TObjString*> (metaData->GetProperty("RunUsed(TObjString)"))){
429                 TObjString runUsed = Form("%d", GetCurrentRun());
430                 metaData->SetProperty("RunUsed(TObjString)", runUsed.Clone());
431         }
432
433         Bool_t result = kFALSE;
434
435         if (!(AliCDBManager::Instance()->GetStorage(localUri))) {
436                 Log("SHUTTLE", Form("StoreLocally - Cannot activate local %s storage", cdbType));
437         } else {
438                 result = AliCDBManager::Instance()->GetStorage(localUri)
439                                         ->Put(object, id, metaData);
440         }
441
442         if(!result) {
443
444                 Log(fCurrentDetector, Form("StoreLocally - Can't store object <%s>!", id.ToString().Data()));
445         }
446
447         return result;
448 }
449
450 //______________________________________________________________________________________________
451 Bool_t AliShuttle::StoreOCDB()
452 {
453         //
454         // Called when preprocessor ends successfully or when previous storage attempt failed (kStoreError status)
455         // Calls underlying StoreOCDB(const char*) function twice, for OCDB and Reference storage.
456         // Then calls StoreRefFilesToGrid to store reference files. 
457         //
458         
459         if (fTestMode & kErrorGrid)
460         {
461                 Log("SHUTTLE", "StoreOCDB - In TESTMODE - Simulating error while storing in the Grid");
462                 Log(fCurrentDetector, "StoreOCDB - In TESTMODE - Simulating error while storing in the Grid");
463                 return kFALSE;
464         }
465         
466         Log("SHUTTLE","Storing OCDB data ...");
467         Bool_t resultCDB = StoreOCDB(fgkMainCDB);
468
469         Log("SHUTTLE","Storing reference data ...");
470         Bool_t resultRef = StoreOCDB(fgkMainRefStorage);
471         
472         Log("SHUTTLE","Storing reference files ...");
473         Bool_t resultRefFiles = StoreRefFilesToGrid();
474         
475         return resultCDB && resultRef && resultRefFiles;
476 }
477
478 //______________________________________________________________________________________________
479 Bool_t AliShuttle::StoreOCDB(const TString& gridURI)
480 {
481         //
482         // Called by StoreOCDB(), performs actual storage to the main OCDB and reference storages (Grid)
483         //
484
485         TObjArray* gridIds=0;
486
487         Bool_t result = kTRUE;
488
489         const char* type = 0;
490         TString localURI;
491         if(gridURI == fgkMainCDB) {
492                 type = "OCDB";
493                 localURI = fgkLocalCDB;
494         } else if(gridURI == fgkMainRefStorage) {
495                 type = "reference";
496                 localURI = fgkLocalRefStorage;
497         } else {
498                 AliError(Form("Invalid storage URI: %s", gridURI.Data()));
499                 return kFALSE;
500         }
501
502         AliCDBManager* man = AliCDBManager::Instance();
503
504         AliCDBStorage *gridSto = man->GetStorage(gridURI);
505         if(!gridSto) {
506                 Log("SHUTTLE",
507                         Form("StoreOCDB - cannot activate main %s storage", type));
508                 return kFALSE;
509         }
510
511         gridIds = gridSto->GetQueryCDBList();
512
513         // get objects previously stored in local CDB
514         AliCDBStorage *localSto = man->GetStorage(localURI);
515         if(!localSto) {
516                 Log("SHUTTLE",
517                         Form("StoreOCDB - cannot activate local %s storage", type));
518                 return kFALSE;
519         }
520         AliCDBPath aPath(GetOfflineDetName(fCurrentDetector.Data()),"*","*");
521         // Local objects were stored with current run as Grid version!
522         TList* localEntries = localSto->GetAll(aPath.GetPath(), GetCurrentRun(), GetCurrentRun());
523         localEntries->SetOwner(1);
524
525         // loop on local stored objects
526         TIter localIter(localEntries);
527         AliCDBEntry *aLocEntry = 0;
528         while((aLocEntry = dynamic_cast<AliCDBEntry*> (localIter.Next()))){
529                 aLocEntry->SetOwner(1);
530                 AliCDBId aLocId = aLocEntry->GetId();
531                 aLocEntry->SetVersion(-1);
532                 aLocEntry->SetSubVersion(-1);
533
534                 // If local object is valid up to infinity we store it only if it is
535                 // the first unprocessed run!
536                 if (aLocId.GetLastRun() == AliCDBRunRange::Infinity() &&
537                         !fFirstUnprocessed[GetDetPos(fCurrentDetector)])
538                 {
539                         Log("SHUTTLE", Form("StoreOCDB - %s: object %s has validity infinite but "
540                                                 "there are previous unprocessed runs!",
541                                                 fCurrentDetector.Data(), aLocId.GetPath().Data()));
542                         continue;
543                 }
544
545                 // loop on Grid valid Id's
546                 Bool_t store = kTRUE;
547                 TIter gridIter(gridIds);
548                 AliCDBId* aGridId = 0;
549                 while((aGridId = dynamic_cast<AliCDBId*> (gridIter.Next()))){
550                         if(aGridId->GetPath() != aLocId.GetPath()) continue;
551                         // skip all objects valid up to infinity
552                         if(aGridId->GetLastRun() == AliCDBRunRange::Infinity()) continue;
553                         // if we get here, it means there's already some more recent object stored on Grid!
554                         store = kFALSE;
555                         break;
556                 }
557
558                 // If we get here, the file can be stored!
559                 Bool_t storeOk = gridSto->Put(aLocEntry);
560                 if(!store || storeOk){
561
562                         if (!store)
563                         {
564                                 Log(fCurrentDetector.Data(),
565                                         Form("StoreOCDB - A more recent object already exists in %s storage: <%s>",
566                                                 type, aGridId->ToString().Data()));
567                         } else {
568                                 Log("SHUTTLE",
569                                         Form("StoreOCDB - Object <%s> successfully put into %s storage",
570                                                 aLocId.ToString().Data(), type));
571                                 Log(fCurrentDetector.Data(),
572                                         Form("StoreOCDB - Object <%s> successfully put into %s storage",
573                                                 aLocId.ToString().Data(), type));
574                         }
575
576                         // removing local filename...
577                         TString filename;
578                         localSto->IdToFilename(aLocId, filename);
579                         AliInfo(Form("Removing local file %s", filename.Data()));
580                         RemoveFile(filename.Data());
581                         continue;
582                 } else  {
583                         Log("SHUTTLE",
584                                 Form("StoreOCDB - Grid %s storage of object <%s> failed",
585                                         type, aLocId.ToString().Data()));
586                         Log(fCurrentDetector.Data(),
587                                 Form("StoreOCDB - Grid %s storage of object <%s> failed",
588                                         type, aLocId.ToString().Data()));
589                         result = kFALSE;
590                 }
591         }
592         localEntries->Clear();
593
594         return result;
595 }
596
597 //______________________________________________________________________________________________
598 Bool_t AliShuttle::CleanReferenceStorage(const char* detector)
599 {
600         // clears the directory used to store reference files of a given subdetector
601   
602         AliCDBManager* man = AliCDBManager::Instance();
603         AliCDBStorage* sto = man->GetStorage(fgkLocalRefStorage);
604         TString localBaseFolder = sto->GetBaseFolder();
605
606         TString targetDir = GetRefFilePrefix(localBaseFolder.Data(), detector);
607         
608         Log("SHUTTLE", Form("Cleaning %s", targetDir.Data()));
609
610         TString begin;
611         begin.Form("%d_", GetCurrentRun());
612         
613         TSystemDirectory* baseDir = new TSystemDirectory("/", targetDir);
614         if (!baseDir)
615                 return kTRUE;
616                 
617         TList* dirList = baseDir->GetListOfFiles();
618         delete baseDir;
619         
620         if (!dirList) return kTRUE;
621                         
622         if (dirList->GetEntries() < 3) 
623         {
624                 delete dirList;
625                 return kTRUE;
626         }
627                                 
628         Int_t nDirs = 0, nDel = 0;
629         TIter dirIter(dirList);
630         TSystemFile* entry = 0;
631
632         Bool_t success = kTRUE;
633         
634         while ((entry = dynamic_cast<TSystemFile*> (dirIter.Next())))
635         {                                       
636                 if (entry->IsDirectory())
637                         continue;
638                 
639                 TString fileName(entry->GetName());
640                 if (!fileName.BeginsWith(begin))
641                         continue;
642                         
643                 nDirs++;
644                                                 
645                 // delete file
646                 Int_t result = gSystem->Unlink(fileName.Data());
647                 
648                 if (result)
649                 {
650                         Log("SHUTTLE", Form("Could not delete file %s!", fileName.Data()));
651                         success = kFALSE;
652                 } else {
653                         nDel++;
654                 }
655         }
656
657         if(nDirs > 0)
658                 Log("SHUTTLE", Form("CleanReferenceStorage - %d (over %d) reference files in folder %s were deleted.", 
659                         nDel, nDirs, targetDir.Data()));
660
661                 
662         delete dirList;
663         return success;
664
665
666
667
668
669
670   Int_t result = gSystem->GetPathInfo(targetDir, 0, (Long64_t*) 0, 0, 0);
671   if (result == 0)
672   {
673     // delete directory
674     result = gSystem->Exec(Form("rm -r %s", targetDir.Data()));
675     if (result != 0)
676     {  
677       Log("SHUTTLE", Form("StoreReferenceFile - Could not clear directory %s", targetDir.Data()));
678       return kFALSE;
679     }
680   }
681
682   result = gSystem->mkdir(targetDir, kTRUE);
683   if (result != 0)
684   {
685     Log("SHUTTLE", Form("StoreReferenceFile - Error creating base directory %s", targetDir.Data()));
686     return kFALSE;
687   }
688         
689   return kTRUE;
690 }
691
692 //______________________________________________________________________________________________
693 Bool_t AliShuttle::StoreReferenceFile(const char* detector, const char* localFile, const char* gridFileName)
694 {
695         //
696         // Stores reference file directly (without opening it). This function stores the file locally.
697         //
698         // The file is stored under the following location: 
699         // <base folder of local reference storage>/<DET>/<RUN#>_<gridFileName>
700         // where <gridFileName> is the second parameter given to the function
701         // 
702         
703         if (fTestMode & kErrorStorage)
704         {
705                 Log(fCurrentDetector, "StoreReferenceFile - In TESTMODE - Simulating error while storing locally");
706                 return kFALSE;
707         }
708         
709         AliCDBManager* man = AliCDBManager::Instance();
710         AliCDBStorage* sto = man->GetStorage(fgkLocalRefStorage);
711         
712         TString localBaseFolder = sto->GetBaseFolder();
713         
714         TString targetDir = GetRefFilePrefix(localBaseFolder.Data(), detector); 
715         
716         //try to open folder, if does not exist
717         void* dir = gSystem->OpenDirectory(targetDir.Data());
718         if (dir == NULL) {
719                 if (gSystem->mkdir(targetDir.Data(), kTRUE)) {
720                         Log("SHUTTLE", Form("Can't open directory <%s>", targetDir.Data()));
721                         return kFALSE;
722                 }
723
724         } else {
725                 gSystem->FreeDirectory(dir);
726         }
727
728         TString target;
729         target.Form("%s/%d_%s", targetDir.Data(), GetCurrentRun(), gridFileName);
730         
731         Int_t result = gSystem->GetPathInfo(localFile, 0, (Long64_t*) 0, 0, 0);
732         if (result)
733         {
734                 Log("SHUTTLE", Form("StoreReferenceFile - %s does not exist", localFile));
735                 return kFALSE;
736         }
737
738         result = gSystem->CopyFile(localFile, target);
739
740         if (result == 0)
741         {
742                 Log("SHUTTLE", Form("StoreReferenceFile - File %s stored locally to %s", localFile, target.Data()));
743                 return kTRUE;
744         }
745         else
746         {
747                 Log("SHUTTLE", Form("StoreReferenceFile - Could not store file %s to %s!. Error code = %d", 
748                                 localFile, target.Data(), result));
749                 return kFALSE;
750         }       
751 }
752
753 //______________________________________________________________________________________________
754 Bool_t AliShuttle::StoreRefFilesToGrid()
755 {
756         //
757         // Transfers the reference file to the Grid.
758         //
759         // The files are stored under the following location: 
760         // <base folder of reference storage>/<DET>/<RUN#>_<gridFileName>
761         //
762         
763         AliCDBManager* man = AliCDBManager::Instance();
764         AliCDBStorage* sto = man->GetStorage(fgkLocalRefStorage);
765         if (!sto)
766                 return kFALSE;
767         TString localBaseFolder = sto->GetBaseFolder();
768                 
769         TString dir = GetRefFilePrefix(localBaseFolder.Data(), fCurrentDetector.Data());
770                 
771         AliCDBStorage* gridSto = man->GetStorage(fgkMainRefStorage);
772         if (!gridSto)
773                 return kFALSE;
774         
775         TString gridBaseFolder = gridSto->GetBaseFolder();
776
777         TString alienDir = GetRefFilePrefix(gridBaseFolder.Data(), fCurrentDetector.Data());
778         
779         TString begin;
780         begin.Form("%d_", GetCurrentRun());
781         
782         TSystemDirectory* baseDir = new TSystemDirectory("/", dir);
783         if (!baseDir)
784                 return kTRUE;
785                 
786         TList* dirList = baseDir->GetListOfFiles();
787         delete baseDir;
788         
789         if (!dirList) return kTRUE;
790                 
791         if (dirList->GetEntries() < 3) 
792         {
793                 delete dirList;
794                 return kTRUE;
795         }
796                         
797         if (!gGrid)
798         { 
799                 Log("SHUTTLE", "Connection to Grid failed: Cannot continue!");
800                 delete dirList;
801                 return kFALSE;
802         }
803         
804         Int_t nDirs = 0, nTransfer = 0;
805         TIter dirIter(dirList);
806         TSystemFile* entry = 0;
807
808         Bool_t success = kTRUE;
809         Bool_t first = kTRUE;
810         
811         while ((entry = dynamic_cast<TSystemFile*> (dirIter.Next())))
812         {                       
813                 if (entry->IsDirectory())
814                         continue;
815                         
816                 TString fileName(entry->GetName());
817                 if (!fileName.BeginsWith(begin))
818                         continue;
819                         
820                 nDirs++;
821                         
822                 if (first)
823                 {
824                         first = kFALSE;
825                         // check that DET folder exists, otherwise create it
826                         TGridResult* result = gGrid->Ls(alienDir.Data(), "a");
827                         
828                         if (!result)
829                         {
830                                 delete dirList;
831                                 return kFALSE;
832                         }
833                         
834                         if (!result->GetFileName(1)) // TODO: It looks like element 0 is always 0!!
835                         {
836                                 if (!gGrid->Mkdir(alienDir.Data(),"",0))
837                                 {
838                                         Log("SHUTTLE", Form("StoreRefFilesToGrid - Cannot create directory %s",
839                                                         alienDir.Data()));
840                                         delete dirList;
841                                         return kFALSE;
842                                 } else {
843                                         Log("SHUTTLE",Form("Folder %s created", alienDir.Data()));
844                                 }
845                                 
846                         } else {
847                                         Log("SHUTTLE",Form("Folder %s found", alienDir.Data()));
848                         }
849                 }
850                         
851                 TString fullLocalPath;
852                 fullLocalPath.Form("%s/%s", dir.Data(), fileName.Data());
853                 
854                 TString fullGridPath;
855                 fullGridPath.Form("alien://%s/%s", alienDir.Data(), fileName.Data());
856
857                 TFileMerger fileMerger;
858                 Bool_t result = TFile::Cp(fullLocalPath, fullGridPath);
859                 
860                 if (result)
861                 {
862                         Log("SHUTTLE", Form("StoreRefFilesToGrid - Copying local file %s to %s succeeded!", fullLocalPath.Data(), fullGridPath.Data()));
863                         RemoveFile(fullLocalPath);
864                         nTransfer++;
865                 }
866                 else
867                 {
868                         Log("SHUTTLE", Form("StoreRefFilesToGrid - Copying local file %s to %s FAILED!", fullLocalPath.Data(), fullGridPath.Data()));
869                         success = kFALSE;
870                 }
871         }
872
873         Log("SHUTTLE", Form("StoreRefFilesToGrid - %d (over %d) reference files in folder %s copied to Grid.", nTransfer, nDirs, dir.Data()));
874
875                 
876         delete dirList;
877         return success;
878 }
879
880 //______________________________________________________________________________________________
881 const char* AliShuttle::GetRefFilePrefix(const char* base, const char* detector)
882 {
883         //
884         // Get folder name of reference files 
885         //
886
887         TString offDetStr(GetOfflineDetName(detector));
888         TString dir;
889         if (offDetStr == "ITS" || offDetStr == "MUON" || offDetStr == "PHOS")
890         {
891                 dir.Form("%s/%s/%s", base, offDetStr.Data(), detector);
892         } else {
893                 dir.Form("%s/%s", base, offDetStr.Data());
894         }
895         
896         return dir.Data();
897         
898
899 }
900 //______________________________________________________________________________________________
901 void AliShuttle::CleanLocalStorage(const TString& uri)
902 {
903         //
904         // Called in case the preprocessor is declared failed. Remove remaining objects from the local storages.
905         //
906
907         const char* type = 0;
908         if(uri == fgkLocalCDB) {
909                 type = "OCDB";
910         } else if(uri == fgkLocalRefStorage) {
911                 type = "Reference";
912         } else {
913                 AliError(Form("Invalid storage URI: %s", uri.Data()));
914                 return;
915         }
916
917         AliCDBManager* man = AliCDBManager::Instance();
918
919         // open local storage
920         AliCDBStorage *localSto = man->GetStorage(uri);
921         if(!localSto) {
922                 Log("SHUTTLE",
923                         Form("CleanLocalStorage - cannot activate local %s storage", type));
924                 return;
925         }
926
927         TString filename(Form("%s/%s/*/Run*_v%d_s*.root",
928                 localSto->GetBaseFolder().Data(), GetOfflineDetName(fCurrentDetector.Data()), GetCurrentRun()));
929
930         AliInfo(Form("filename = %s", filename.Data()));
931
932         AliInfo(Form("Removing remaining local files from run %d and detector %s ...",
933                 GetCurrentRun(), fCurrentDetector.Data()));
934
935         RemoveFile(filename.Data());
936
937 }
938
939 //______________________________________________________________________________________________
940 void AliShuttle::RemoveFile(const char* filename)
941 {
942         //
943         // removes local file
944         //
945
946         TString command(Form("rm -f %s", filename));
947
948         Int_t result = gSystem->Exec(command.Data());
949         if(result != 0)
950         {
951                 Log("SHUTTLE", Form("RemoveFile - %s: Cannot remove file %s!",
952                         fCurrentDetector.Data(), filename));
953         }
954 }
955
956 //______________________________________________________________________________________________
957 AliShuttleStatus* AliShuttle::ReadShuttleStatus()
958 {
959         //
960         // Reads the AliShuttleStatus from the CDB
961         //
962
963         if (fStatusEntry){
964                 delete fStatusEntry;
965                 fStatusEntry = 0;
966         }
967
968         fStatusEntry = AliCDBManager::Instance()->GetStorage(GetLocalCDB())
969                 ->Get(Form("/SHUTTLE/STATUS/%s", fCurrentDetector.Data()), GetCurrentRun());
970
971         if (!fStatusEntry) return 0;
972         fStatusEntry->SetOwner(1);
973
974         AliShuttleStatus* status = dynamic_cast<AliShuttleStatus*> (fStatusEntry->GetObject());
975         if (!status) {
976                 AliError("Invalid object stored to CDB!");
977                 return 0;
978         }
979
980         return status;
981 }
982
983 //______________________________________________________________________________________________
984 Bool_t AliShuttle::WriteShuttleStatus(AliShuttleStatus* status)
985 {
986         //
987         // writes the status for one subdetector
988         //
989
990         if (fStatusEntry){
991                 delete fStatusEntry;
992                 fStatusEntry = 0;
993         }
994
995         Int_t run = GetCurrentRun();
996
997         AliCDBId id(AliCDBPath("SHUTTLE", "STATUS", fCurrentDetector), run, run);
998
999         fStatusEntry = new AliCDBEntry(status, id, new AliCDBMetaData);
1000         fStatusEntry->SetOwner(1);
1001
1002         UInt_t result = AliCDBManager::Instance()->GetStorage(fgkLocalCDB)->Put(fStatusEntry);
1003
1004         if (!result) {
1005                 Log("SHUTTLE", Form("WriteShuttleStatus - Failed for %s, run %d",
1006                                                 fCurrentDetector.Data(), run));
1007                 return kFALSE;
1008         }
1009         
1010         SendMLInfo();
1011
1012         return kTRUE;
1013 }
1014
1015 //______________________________________________________________________________________________
1016 void AliShuttle::UpdateShuttleStatus(AliShuttleStatus::Status newStatus, Bool_t increaseCount)
1017 {
1018         //
1019         // changes the AliShuttleStatus for the given detector and run to the given status
1020         //
1021
1022         if (!fStatusEntry){
1023                 AliError("UNEXPECTED: fStatusEntry empty");
1024                 return;
1025         }
1026
1027         AliShuttleStatus* status = dynamic_cast<AliShuttleStatus*> (fStatusEntry->GetObject());
1028
1029         if (!status){
1030                 Log("SHUTTLE", "UNEXPECTED: status could not be read from current CDB entry");
1031                 return;
1032         }
1033
1034         TString actionStr = Form("UpdateShuttleStatus - %s: Changing state from %s to %s",
1035                                 fCurrentDetector.Data(),
1036                                 status->GetStatusName(),
1037                                 status->GetStatusName(newStatus));
1038         Log("SHUTTLE", actionStr);
1039         SetLastAction(actionStr);
1040
1041         status->SetStatus(newStatus);
1042         if (increaseCount) status->IncreaseCount();
1043
1044         AliCDBManager::Instance()->GetStorage(fgkLocalCDB)->Put(fStatusEntry);
1045
1046         SendMLInfo();
1047 }
1048
1049 //______________________________________________________________________________________________
1050 void AliShuttle::SendMLInfo()
1051 {
1052         //
1053         // sends ML information about the current status of the current detector being processed
1054         //
1055         
1056         AliShuttleStatus* status = dynamic_cast<AliShuttleStatus*> (fStatusEntry->GetObject());
1057         
1058         if (!status){
1059                 Log("SHUTTLE", "SendMLInfo - UNEXPECTED: status could not be read from current CDB entry");
1060                 return;
1061         }
1062         
1063         TMonaLisaText  mlStatus(Form("%s_status", fCurrentDetector.Data()), status->GetStatusName());
1064         TMonaLisaValue mlRetryCount(Form("%s_count", fCurrentDetector.Data()), status->GetCount());
1065
1066         TList mlList;
1067         mlList.Add(&mlStatus);
1068         mlList.Add(&mlRetryCount);
1069
1070         fMonaLisa->SendParameters(&mlList);
1071 }
1072
1073 //______________________________________________________________________________________________
1074 Bool_t AliShuttle::ContinueProcessing()
1075 {
1076         // this function reads the AliShuttleStatus information from CDB and
1077         // checks if the processing should be continued
1078         // if yes it returns kTRUE and updates the AliShuttleStatus with nextStatus
1079
1080         if (!fConfig->HostProcessDetector(fCurrentDetector)) return kFALSE;
1081
1082         AliPreprocessor* aPreprocessor =
1083                 dynamic_cast<AliPreprocessor*> (fPreprocessorMap.GetValue(fCurrentDetector));
1084         if (!aPreprocessor)
1085         {
1086                 AliInfo(Form("%s: no preprocessor registered", fCurrentDetector.Data()));
1087                 return kFALSE;
1088         }
1089
1090         AliShuttleLogbookEntry::Status entryStatus =
1091                 fLogbookEntry->GetDetectorStatus(fCurrentDetector);
1092
1093         if(entryStatus != AliShuttleLogbookEntry::kUnprocessed) {
1094                 AliInfo(Form("ContinueProcessing - %s is %s",
1095                                 fCurrentDetector.Data(),
1096                                 fLogbookEntry->GetDetectorStatusName(entryStatus)));
1097                 return kFALSE;
1098         }
1099
1100         // if we get here, according to Shuttle logbook subdetector is in UNPROCESSED state
1101
1102         // check if current run is first unprocessed run for current detector
1103         if (fConfig->StrictRunOrder(fCurrentDetector) &&
1104                 !fFirstUnprocessed[GetDetPos(fCurrentDetector)])
1105         {
1106                 if (fTestMode == kNone)
1107                 {
1108                         Log("SHUTTLE", Form("ContinueProcessing - %s requires strict run ordering but this is not the first unprocessed run!"));
1109                         return kFALSE;
1110                 }
1111                 else
1112                 {
1113                         Log("SHUTTLE", Form("ContinueProcessing - In TESTMODE - Although %s requires strict run ordering and this is not the first unprocessed run, the SHUTTLE continues"));
1114                 }
1115         }
1116
1117         AliShuttleStatus* status = ReadShuttleStatus();
1118         if (!status) {
1119                 // first time
1120                 Log("SHUTTLE", Form("ContinueProcessing - %s: Processing first time",
1121                                 fCurrentDetector.Data()));
1122                 status = new AliShuttleStatus(AliShuttleStatus::kStarted);
1123                 return WriteShuttleStatus(status);
1124         }
1125
1126         // The following two cases shouldn't happen if Shuttle Logbook was correctly updated.
1127         // If it happens it may mean Logbook updating failed... let's do it now!
1128         if (status->GetStatus() == AliShuttleStatus::kDone ||
1129             status->GetStatus() == AliShuttleStatus::kFailed){
1130                 Log("SHUTTLE", Form("ContinueProcessing - %s is already %s. Updating Shuttle Logbook",
1131                                         fCurrentDetector.Data(),
1132                                         status->GetStatusName(status->GetStatus())));
1133                 UpdateShuttleLogbook(fCurrentDetector.Data(),
1134                                         status->GetStatusName(status->GetStatus()));
1135                 return kFALSE;
1136         }
1137
1138         if (status->GetStatus() == AliShuttleStatus::kStoreError) {
1139                 Log("SHUTTLE",
1140                         Form("ContinueProcessing - %s: Grid storage of one or more objects failed. Trying again now",
1141                                 fCurrentDetector.Data()));
1142                 UpdateShuttleStatus(AliShuttleStatus::kStoreStarted);
1143                 if (StoreOCDB()){
1144                         Log("SHUTTLE", Form("ContinueProcessing - %s: all objects successfully stored into main storage",
1145                                 fCurrentDetector.Data()));
1146                         UpdateShuttleStatus(AliShuttleStatus::kDone);
1147                         UpdateShuttleLogbook(fCurrentDetector.Data(), "DONE");
1148                 } else {
1149                         Log("SHUTTLE",
1150                                 Form("ContinueProcessing - %s: Grid storage failed again",
1151                                         fCurrentDetector.Data()));
1152                         UpdateShuttleStatus(AliShuttleStatus::kStoreError);
1153                 }
1154                 return kFALSE;
1155         }
1156
1157         // if we get here, there is a restart
1158         Bool_t cont = kFALSE;
1159
1160         // abort conditions
1161         if (status->GetCount() >= fConfig->GetMaxRetries()) {
1162                 Log("SHUTTLE", Form("ContinueProcessing - %s failed %d times in status %s - "
1163                                 "Updating Shuttle Logbook", fCurrentDetector.Data(),
1164                                 status->GetCount(), status->GetStatusName()));
1165                 UpdateShuttleLogbook(fCurrentDetector.Data(), "FAILED");
1166                 UpdateShuttleStatus(AliShuttleStatus::kFailed);
1167
1168                 // there may still be objects in local OCDB and reference storage
1169                 // and FXS databases may be not updated: do it now!
1170                 
1171                 // TODO Currently disabled, we want to keep files in case of failure!
1172                 // CleanLocalStorage(fgkLocalCDB);
1173                 // CleanLocalStorage(fgkLocalRefStorage);
1174                 // UpdateTableFailCase();
1175                 
1176                 // Send mail to detector expert!
1177                 AliInfo(Form("Sending mail to %s expert...", fCurrentDetector.Data()));
1178                 if (!SendMail())
1179                         Log("SHUTTLE", Form("ContinueProcessing - Could not send mail to %s expert",
1180                                         fCurrentDetector.Data()));
1181
1182         } else {
1183                 Log("SHUTTLE", Form("ContinueProcessing - %s: restarting. "
1184                                 "Aborted before with %s. Retry number %d.", fCurrentDetector.Data(),
1185                                 status->GetStatusName(), status->GetCount()));
1186                 Bool_t increaseCount = kTRUE;
1187                 if (status->GetStatus() == AliShuttleStatus::kDCSError || status->GetStatus() == AliShuttleStatus::kDCSStarted)
1188                         increaseCount = kFALSE;
1189                 UpdateShuttleStatus(AliShuttleStatus::kStarted, increaseCount);
1190                 cont = kTRUE;
1191         }
1192
1193         return cont;
1194 }
1195
1196 //______________________________________________________________________________________________
1197 Bool_t AliShuttle::Process(AliShuttleLogbookEntry* entry)
1198 {
1199         //
1200         // Makes data retrieval for all detectors in the configuration.
1201         // entry: Shuttle logbook entry, contains run paramenters and status of detectors
1202         // (Unprocessed, Inactive, Failed or Done).
1203         // Returns kFALSE in case of error occured and kTRUE otherwise
1204         //
1205
1206         if (!entry) return kFALSE;
1207
1208         fLogbookEntry = entry;
1209
1210         AliInfo(Form("\n\n \t\t\t^*^*^*^*^*^*^*^*^*^*^*^* run %d: START ^*^*^*^*^*^*^*^*^*^*^*^* \n",
1211                                         GetCurrentRun()));
1212
1213         // create ML instance that monitors this run
1214         fMonaLisa = new TMonaLisaWriter(Form("%d", GetCurrentRun()), "SHUTTLE", "aliendb1.cern.ch");
1215         // disable monitoring of other parameters that come e.g. from TFile
1216         gMonitoringWriter = 0;
1217
1218         // Send the information to ML
1219         TMonaLisaText  mlStatus("SHUTTLE_status", "Processing");
1220         TMonaLisaText  mlRunType("SHUTTLE_runtype", Form("%s (%s)", entry->GetRunType(), entry->GetRunParameter("log")));
1221
1222         TList mlList;
1223         mlList.Add(&mlStatus);
1224         mlList.Add(&mlRunType);
1225
1226         fMonaLisa->SendParameters(&mlList);
1227
1228         if (fLogbookEntry->IsDone())
1229         {
1230                 Log("SHUTTLE","Process - Shuttle is already DONE. Updating logbook");
1231                 UpdateShuttleLogbook("shuttle_done");
1232                 fLogbookEntry = 0;
1233                 return kTRUE;
1234         }
1235
1236         // read test mode if flag is set
1237         if (fReadTestMode)
1238         {
1239                 fTestMode = kNone;
1240                 TString logEntry(entry->GetRunParameter("log"));
1241                 //printf("log entry = %s\n", logEntry.Data());
1242                 TString searchStr("Testmode: ");
1243                 Int_t pos = logEntry.Index(searchStr.Data());
1244                 //printf("%d\n", pos);
1245                 if (pos >= 0)
1246                 {
1247                         TSubString subStr = logEntry(pos + searchStr.Length(), logEntry.Length());
1248                         //printf("%s\n", subStr.String().Data());
1249                         TString newStr(subStr.Data());
1250                         TObjArray* token = newStr.Tokenize(' ');
1251                         if (token)
1252                         {
1253                                 //token->Print();
1254                                 TObjString* tmpStr = dynamic_cast<TObjString*> (token->First());
1255                                 if (tmpStr)
1256                                 {
1257                                         Int_t testMode = tmpStr->String().Atoi();
1258                                         if (testMode > 0)
1259                                         {
1260                                                 Log("SHUTTLE", Form("Enabling test mode %d", testMode));
1261                                                 SetTestMode((TestMode) testMode);
1262                                         }
1263                                 }
1264                                 delete token;          
1265                         }
1266                 }
1267         }
1268         
1269         Log("SHUTTLE", Form("The test mode flag is %d", (Int_t) fTestMode));
1270         
1271         fLogbookEntry->Print("all");
1272
1273         // Initialization
1274         Bool_t hasError = kFALSE;
1275
1276         AliCDBStorage *mainCDBSto = AliCDBManager::Instance()->GetStorage(fgkMainCDB);
1277         if(mainCDBSto) mainCDBSto->QueryCDB(GetCurrentRun());
1278         AliCDBStorage *mainRefSto = AliCDBManager::Instance()->GetStorage(fgkMainRefStorage);
1279         if(mainRefSto) mainRefSto->QueryCDB(GetCurrentRun());
1280
1281         // Loop on detectors in the configuration
1282         TIter iter(fConfig->GetDetectors());
1283         TObjString* aDetector = 0;
1284
1285         while ((aDetector = (TObjString*) iter.Next()))
1286         {
1287                 fCurrentDetector = aDetector->String();
1288
1289                 if (ContinueProcessing() == kFALSE) continue;
1290
1291                 AliInfo(Form("\n\n \t\t\t****** run %d - %s: START  ******",
1292                                                 GetCurrentRun(), aDetector->GetName()));
1293
1294                 for(Int_t iSys=0;iSys<3;iSys++) fFXSCalled[iSys]=kFALSE;
1295
1296                 Log(fCurrentDetector.Data(), "Starting processing");
1297
1298                 Int_t pid = fork();
1299
1300                 if (pid < 0)
1301                 {
1302                         Log("SHUTTLE", "ERROR: Forking failed");
1303                 }
1304                 else if (pid > 0)
1305                 {
1306                         // parent
1307                         AliInfo(Form("In parent process of %d - %s: Starting monitoring",
1308                                                         GetCurrentRun(), aDetector->GetName()));
1309
1310                         Long_t begin = time(0);
1311
1312                         int status; // to be used with waitpid, on purpose an int (not Int_t)!
1313                         while (waitpid(pid, &status, WNOHANG) == 0)
1314                         {
1315                                 Long_t expiredTime = time(0) - begin;
1316
1317                                 if (expiredTime > fConfig->GetPPTimeOut())
1318                                 {
1319                                         TString tmp;
1320                                         tmp.Form("Process of %s time out. Run time: %d seconds. Killing...",
1321                                                                 fCurrentDetector.Data(), expiredTime);
1322                                         Log("SHUTTLE", tmp);
1323                                         Log(fCurrentDetector, tmp);
1324
1325                                         kill(pid, 9);
1326
1327                                         UpdateShuttleStatus(AliShuttleStatus::kPPTimeOut);
1328                                         hasError = kTRUE;
1329
1330                                         gSystem->Sleep(1000);
1331                                 }
1332                                 else
1333                                 {
1334                                         gSystem->Sleep(1000);
1335                                         
1336                                         TString checkStr;
1337                                         checkStr.Form("ps -o vsize --pid %d | tail -n 1", pid);
1338                                         FILE* pipe = gSystem->OpenPipe(checkStr, "r");
1339                                         if (!pipe)
1340                                         {
1341                                                 Log("SHUTTLE", Form("Error: Could not open pipe to %s", checkStr.Data()));
1342                                                 continue;
1343                                         }
1344                                                 
1345                                         char buffer[100];
1346                                         if (!fgets(buffer, 100, pipe))
1347                                         {
1348                                                 Log("SHUTTLE", "Error: ps did not return anything");
1349                                                 gSystem->ClosePipe(pipe);
1350                                                 continue;
1351                                         }
1352                                         gSystem->ClosePipe(pipe);
1353                                         
1354                                         //Log("SHUTTLE", Form("ps returned %s", buffer));
1355                                         
1356                                         Int_t mem = 0;
1357                                         if ((sscanf(buffer, "%d\n", &mem) != 1) || !mem)
1358                                         {
1359                                                 Log("SHUTTLE", "Error: Could not parse output of ps");
1360                                                 continue;
1361                                         }
1362                                         
1363                                         if (expiredTime % 60 == 0)
1364                                                 Log("SHUTTLE", Form("%s: Checking process. Run time: %d seconds - Memory consumption: %d KB",
1365                                                                 fCurrentDetector.Data(), expiredTime, mem));
1366                                         
1367                                         if (mem > fConfig->GetPPMaxMem())
1368                                         {
1369                                                 TString tmp;
1370                                                 tmp.Form("Process exceeds maximum allowed memory (%d KB > %d KB). Killing...",
1371                                                         mem, fConfig->GetPPMaxMem());
1372                                                 Log("SHUTTLE", tmp);
1373                                                 Log(fCurrentDetector, tmp);
1374         
1375                                                 kill(pid, 9);
1376         
1377                                                 UpdateShuttleStatus(AliShuttleStatus::kPPOutOfMemory);
1378                                                 hasError = kTRUE;
1379         
1380                                                 gSystem->Sleep(1000);
1381                                         }
1382                                 }
1383                         }
1384
1385                         AliInfo(Form("In parent process of %d - %s: Client has terminated.",
1386                                                                 GetCurrentRun(), aDetector->GetName()));
1387
1388                         if (WIFEXITED(status))
1389                         {
1390                                 Int_t returnCode = WEXITSTATUS(status);
1391
1392                                 Log("SHUTTLE", Form("%s: the return code is %d", fCurrentDetector.Data(),
1393                                                                                 returnCode));
1394
1395                                 if (returnCode == 0) hasError = kTRUE;
1396                         }
1397                 }
1398                 else if (pid == 0)
1399                 {
1400                         // client
1401                         AliInfo(Form("In client process of %d - %s", GetCurrentRun(), aDetector->GetName()));
1402
1403                         AliInfo("Redirecting output...");
1404
1405                         if ((freopen(GetLogFileName(fCurrentDetector), "a", stdout)) == 0)
1406                         {
1407                                 Log("SHUTTLE", "Could not freopen stdout");
1408                         }
1409                         else
1410                         {
1411                                 fOutputRedirected = kTRUE;
1412                                 if ((dup2(fileno(stdout), fileno(stderr))) < 0)
1413                                         Log("SHUTTLE", "Could not redirect stderr");
1414                                 
1415                         }
1416                         
1417                         TString wd = gSystem->WorkingDirectory();
1418                         TString tmpDir = Form("%s/%s_process",GetShuttleTempDir(),fCurrentDetector.Data());
1419                         
1420                         gSystem->mkdir(tmpDir.Data());
1421                         gSystem->ChangeDirectory(tmpDir.Data());
1422                         
1423                         Bool_t success = ProcessCurrentDetector();
1424                         
1425                         gSystem->ChangeDirectory(wd.Data());
1426                         
1427                         gSystem->Exec(Form("rm -rf %s",tmpDir.Data()));
1428                         
1429                         if (success) // Preprocessor finished successfully!
1430                         { 
1431                                 // Update time_processed field in FXS DB
1432                                 if (UpdateTable() == kFALSE)
1433                                         Log("SHUTTLE", Form("Process - %s: Could not update FXS databases!", 
1434                                                         fCurrentDetector.Data()));
1435
1436                                 // Transfer the data from local storage to main storage (Grid)
1437                                 UpdateShuttleStatus(AliShuttleStatus::kStoreStarted);
1438                                 if (StoreOCDB() == kFALSE)
1439                                 {
1440                                         AliInfo(Form("\n \t\t\t****** run %d - %s: STORAGE ERROR ****** \n\n",
1441                                                         GetCurrentRun(), aDetector->GetName()));
1442                                         UpdateShuttleStatus(AliShuttleStatus::kStoreError);
1443                                         success = kFALSE;
1444                                 } else {
1445                                         AliInfo(Form("\n \t\t\t****** run %d - %s: DONE ****** \n\n",
1446                                                         GetCurrentRun(), aDetector->GetName()));
1447                                         UpdateShuttleStatus(AliShuttleStatus::kDone);
1448                                         UpdateShuttleLogbook(fCurrentDetector, "DONE");
1449                                 }
1450                         }
1451
1452                         for (UInt_t iSys=0; iSys<3; iSys++)
1453                         {
1454                                 if (fFXSCalled[iSys]) fFXSlist[iSys].Clear();
1455                         }
1456
1457                         AliInfo(Form("Client process of %d - %s is exiting now with %d.",
1458                                                         GetCurrentRun(), aDetector->GetName(), success));
1459
1460                         // the client exits here
1461                         gSystem->Exit(success);
1462
1463                         AliError("We should never get here!!!");
1464                 }
1465         }
1466
1467         AliInfo(Form("\n\n \t\t\t^*^*^*^*^*^*^*^*^*^*^*^* run %d: FINISH ^*^*^*^*^*^*^*^*^*^*^*^* \n",
1468                                                         GetCurrentRun()));
1469
1470         //check if shuttle is done for this run, if so update logbook
1471         TObjArray checkEntryArray;
1472         checkEntryArray.SetOwner(1);
1473         TString whereClause = Form("where run=%d", GetCurrentRun());
1474         if (!QueryShuttleLogbook(whereClause.Data(), checkEntryArray) || checkEntryArray.GetEntries() == 0) {
1475                 Log("SHUTTLE", Form("Process - Warning: Cannot check status of run %d on Shuttle logbook!",
1476                                                 GetCurrentRun()));
1477                 return hasError == kFALSE;
1478         }
1479
1480         AliShuttleLogbookEntry* checkEntry = dynamic_cast<AliShuttleLogbookEntry*>
1481                                                 (checkEntryArray.At(0));
1482
1483         if (checkEntry)
1484         {
1485                 if (checkEntry->IsDone())
1486                 {
1487                         Log("SHUTTLE","Process - Shuttle is DONE. Updating logbook");
1488                         UpdateShuttleLogbook("shuttle_done");
1489                 }
1490                 else
1491                 {
1492                         for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
1493                         {
1494                                 if (checkEntry->GetDetectorStatus(iDet) == AliShuttleLogbookEntry::kUnprocessed)
1495                                 {
1496                                         AliDebug(2, Form("Run %d: setting %s as \"not first time unprocessed\"",
1497                                                         checkEntry->GetRun(), GetDetName(iDet)));
1498                                         fFirstUnprocessed[iDet] = kFALSE;
1499                                 }
1500                         }
1501                 }
1502         }
1503
1504         // remove ML instance
1505         delete fMonaLisa;
1506         fMonaLisa = 0;
1507
1508         fLogbookEntry = 0;
1509
1510         return hasError == kFALSE;
1511 }
1512
1513 //______________________________________________________________________________________________
1514 Bool_t AliShuttle::ProcessCurrentDetector()
1515 {
1516         //
1517         // Makes data retrieval just for a specific detector (fCurrentDetector).
1518         // Threre should be a configuration for this detector.
1519
1520         AliInfo(Form("Retrieving values for %s, run %d", fCurrentDetector.Data(), GetCurrentRun()));
1521
1522         if (!CleanReferenceStorage(fCurrentDetector.Data()))
1523                 return kFALSE;
1524
1525         TMap* dcsMap = 0;
1526
1527         // call preprocessor
1528         AliPreprocessor* aPreprocessor =
1529                 dynamic_cast<AliPreprocessor*> (fPreprocessorMap.GetValue(fCurrentDetector));
1530
1531         aPreprocessor->Initialize(GetCurrentRun(), GetCurrentStartTime(), GetCurrentEndTime());
1532
1533         Bool_t processDCS = aPreprocessor->ProcessDCS();
1534
1535         if (!processDCS)
1536         {
1537                 Log(fCurrentDetector, "The preprocessor requested to skip the retrieval of DCS values");
1538         }
1539         else if (fTestMode & kSkipDCS)
1540         {
1541                 Log(fCurrentDetector, "In TESTMODE - Skipping DCS processing!");
1542         } 
1543         else if (fTestMode & kErrorDCS)
1544         {
1545                 Log(fCurrentDetector, "In TESTMODE - Simulating DCS error");
1546                 UpdateShuttleStatus(AliShuttleStatus::kDCSStarted);
1547                 UpdateShuttleStatus(AliShuttleStatus::kDCSError);
1548                 return kFALSE;
1549         } else {
1550
1551                 UpdateShuttleStatus(AliShuttleStatus::kDCSStarted);
1552
1553                 TString host(fConfig->GetDCSHost(fCurrentDetector));
1554                 Int_t port = fConfig->GetDCSPort(fCurrentDetector);
1555
1556                 if (fConfig->GetDCSAliases(fCurrentDetector)->GetEntries() > 0)
1557                 {
1558                         dcsMap = GetValueSet(host, port, fConfig->GetDCSAliases(fCurrentDetector), kAlias);
1559                         if (!dcsMap)
1560                         {
1561                                 Log(fCurrentDetector, "ProcessCurrentDetector - Error while retrieving DCS aliases");
1562                                 UpdateShuttleStatus(AliShuttleStatus::kDCSError);
1563                                 return kFALSE;
1564                         }
1565                 }
1566                 
1567                 if (fConfig->GetDCSDataPoints(fCurrentDetector)->GetEntries() > 0)
1568                 {
1569                         TMap* dcsMap2 = GetValueSet(host, port, fConfig->GetDCSDataPoints(fCurrentDetector), kDP);
1570                         if (!dcsMap2)
1571                         {
1572                                 Log(fCurrentDetector, "ProcessCurrentDetector - Error while retrieving DCS data points");
1573                                 UpdateShuttleStatus(AliShuttleStatus::kDCSError);
1574                                 if (dcsMap)
1575                                         delete dcsMap;
1576                                 return kFALSE;
1577                         }
1578                         
1579                         if (!dcsMap)
1580                         {
1581                                 dcsMap = dcsMap2;
1582                         }
1583                         else // merge
1584                         {
1585                                 TIter iter(dcsMap2);
1586                                 TObjString* key = 0;
1587                                 while ((key = (TObjString*) iter.Next()))
1588                                         dcsMap->Add(key, dcsMap2->GetValue(key->String()));
1589                                         
1590                                 dcsMap2->SetOwner(kFALSE);
1591                                 delete dcsMap2;
1592                         }
1593                 }
1594                 
1595         }
1596
1597         // still no map?
1598         if (!dcsMap)
1599                 dcsMap = new TMap;
1600         
1601         // DCS Archive DB processing successful. Call Preprocessor!
1602         UpdateShuttleStatus(AliShuttleStatus::kPPStarted);
1603
1604         UInt_t returnValue = aPreprocessor->Process(dcsMap);
1605
1606         if (returnValue > 0) // Preprocessor error!
1607         {
1608                 Log(fCurrentDetector, Form("Preprocessor failed. Process returned %d.", returnValue));
1609                 UpdateShuttleStatus(AliShuttleStatus::kPPError);
1610                 dcsMap->DeleteAll();
1611                 delete dcsMap;
1612                 return kFALSE;
1613         }
1614         
1615         // preprocessor ok!
1616         UpdateShuttleStatus(AliShuttleStatus::kPPDone);
1617         Log(fCurrentDetector, Form("ProcessCurrentDetector - %s preprocessor returned success",
1618                                 fCurrentDetector.Data()));
1619
1620         dcsMap->DeleteAll();
1621         delete dcsMap;
1622
1623         return kTRUE;
1624 }
1625
1626 //______________________________________________________________________________________________
1627 Bool_t AliShuttle::QueryShuttleLogbook(const char* whereClause,
1628                 TObjArray& entries)
1629 {
1630         // Query DAQ's Shuttle logbook and fills detector status object.
1631         // Call QueryRunParameters to query DAQ logbook for run parameters.
1632         //
1633
1634         entries.SetOwner(1);
1635
1636         // check connection, in case connect
1637         if(!Connect(3)) return kFALSE;
1638
1639         TString sqlQuery;
1640         sqlQuery = Form("select * from %s %s order by run", fConfig->GetShuttlelbTable(), whereClause);
1641
1642         TSQLResult* aResult = fServer[3]->Query(sqlQuery);
1643         if (!aResult) {
1644                 AliError(Form("Can't execute query <%s>!", sqlQuery.Data()));
1645                 return kFALSE;
1646         }
1647
1648         AliDebug(2,Form("Query = %s", sqlQuery.Data()));
1649
1650         if(aResult->GetRowCount() == 0) {
1651                 AliInfo("No entries in Shuttle Logbook match request");
1652                 delete aResult;
1653                 return kTRUE;
1654         }
1655
1656         // TODO Check field count!
1657         const UInt_t nCols = 23;
1658         if (aResult->GetFieldCount() != (Int_t) nCols) {
1659                 AliError("Invalid SQL result field number!");
1660                 delete aResult;
1661                 return kFALSE;
1662         }
1663
1664         TSQLRow* aRow;
1665         while ((aRow = aResult->Next())) {
1666                 TString runString(aRow->GetField(0), aRow->GetFieldLength(0));
1667                 Int_t run = runString.Atoi();
1668
1669                 AliShuttleLogbookEntry *entry = QueryRunParameters(run);
1670                 if (!entry)
1671                         continue;
1672
1673                 // loop on detectors
1674                 for(UInt_t ii = 0; ii < nCols; ii++)
1675                         entry->SetDetectorStatus(aResult->GetFieldName(ii), aRow->GetField(ii));
1676
1677                 entries.AddLast(entry);
1678                 delete aRow;
1679         }
1680
1681         delete aResult;
1682         return kTRUE;
1683 }
1684
1685 //______________________________________________________________________________________________
1686 AliShuttleLogbookEntry* AliShuttle::QueryRunParameters(Int_t run)
1687 {
1688         //
1689         // Retrieve run parameters written in the DAQ logbook and sets them into AliShuttleLogbookEntry object
1690         //
1691
1692         // check connection, in case connect
1693         if (!Connect(3))
1694                 return 0;
1695
1696         TString sqlQuery;
1697         sqlQuery.Form("select * from %s where run=%d", fConfig->GetDAQlbTable(), run);
1698
1699         TSQLResult* aResult = fServer[3]->Query(sqlQuery);
1700         if (!aResult) {
1701                 AliError(Form("Can't execute query <%s>!", sqlQuery.Data()));
1702                 return 0;
1703         }
1704
1705         if (aResult->GetRowCount() == 0) {
1706                 Log("SHUTTLE", Form("QueryRunParameters - No entry in DAQ Logbook for run %d. Skipping", run));
1707                 delete aResult;
1708                 return 0;
1709         }
1710
1711         if (aResult->GetRowCount() > 1) {
1712                 AliError(Form("More than one entry in DAQ Logbook for run %d. Skipping", run));
1713                 delete aResult;
1714                 return 0;
1715         }
1716
1717         TSQLRow* aRow = aResult->Next();
1718         if (!aRow)
1719         {
1720                 AliError(Form("Could not retrieve row for run %d. Skipping", run));
1721                 delete aResult;
1722                 return 0;
1723         }
1724
1725         AliShuttleLogbookEntry* entry = new AliShuttleLogbookEntry(run);
1726
1727         for (Int_t ii = 0; ii < aResult->GetFieldCount(); ii++)
1728                 entry->SetRunParameter(aResult->GetFieldName(ii), aRow->GetField(ii));
1729
1730         UInt_t startTime = entry->GetStartTime();
1731         UInt_t endTime = entry->GetEndTime();
1732
1733         if (!startTime || !endTime || startTime > endTime) {
1734                 Log("SHUTTLE",
1735                         Form("QueryRunParameters - Invalid parameters for Run %d: startTime = %d, endTime = %d",
1736                                 run, startTime, endTime));
1737                 delete entry;
1738                 delete aRow;
1739                 delete aResult;
1740                 return 0;
1741         }
1742
1743         delete aRow;
1744         delete aResult;
1745
1746         return entry;
1747 }
1748
1749 //______________________________________________________________________________________________
1750 Bool_t AliShuttle::GetValueSet(const char* host, Int_t port, const char* entry,
1751                                 TObjArray* valueSet, DCSType type)
1752 {
1753         // Retrieve all "entry" data points from the DCS server
1754         // host, port: TSocket connection parameters
1755         // entry: name of the alias or data point
1756         // valueSet: array of retrieved AliDCSValue's
1757         // type: kAlias or kDP
1758
1759         AliDCSClient client(host, port, fTimeout, fRetries);
1760         if (!client.IsConnected())
1761         {
1762                 return kFALSE;
1763         }
1764
1765         Int_t result=0;
1766
1767         if (type == kAlias)
1768         {
1769                 result = client.GetAliasValues(entry,
1770                         GetCurrentStartTime(), GetCurrentEndTime(), valueSet);
1771         } else
1772         if (type == kDP)
1773         {
1774                 result = client.GetDPValues(entry,
1775                         GetCurrentStartTime(), GetCurrentEndTime(), valueSet);
1776         }
1777
1778         if (result < 0)
1779         {
1780                 Log(fCurrentDetector.Data(), Form("GetValueSet - Can't get '%s'! Reason: %s",
1781                         entry, AliDCSClient::GetErrorString(result)));
1782
1783                 if (result == AliDCSClient::fgkServerError)
1784                 {
1785                         Log(fCurrentDetector.Data(), Form("GetValueSet - Server error: %s",
1786                                 client.GetServerError().Data()));
1787                 }
1788
1789                 return kFALSE;
1790         }
1791
1792         return kTRUE;
1793 }
1794
1795 //______________________________________________________________________________________________
1796 TMap* AliShuttle::GetValueSet(const char* host, Int_t port, const TSeqCollection* entries,
1797                               DCSType type)
1798 {
1799         // Retrieve all "entry" data points from the DCS server
1800         // host, port: TSocket connection parameters
1801         // entries: list of name of the alias or data point
1802         // type: kAlias or kDP
1803         // returns TMap of values, 0 when failure
1804
1805         const Int_t kSplit = 100; // maximum number of DPs at a time
1806         
1807         Int_t totalEntries = entries->GetEntries();
1808         
1809         TMap* result = 0;
1810         
1811         for (Int_t index=0; index < totalEntries; index += kSplit)
1812         {
1813                 Int_t endIndex = index + kSplit;
1814         
1815                 AliDCSClient client(host, port, fTimeout, fRetries);
1816                 if (!client.IsConnected())
1817                         return 0;
1818
1819                 TMap* partialResult = 0;
1820
1821                 if (type == kAlias)
1822                 {
1823                         partialResult = client.GetAliasValues(entries, GetCurrentStartTime(), 
1824                                 GetCurrentEndTime(), index, endIndex);
1825                 } 
1826                 else if (type == kDP)
1827                 {
1828                         partialResult = client.GetDPValues(entries, GetCurrentStartTime(), 
1829                                 GetCurrentEndTime(), index, endIndex);
1830                 }
1831
1832                 if (partialResult == 0)
1833                 {
1834                         Log(fCurrentDetector.Data(), Form("GetValueSet - Can't get entries (%d...%d)! Reason: %s",
1835                                 index, endIndex, client.GetServerError().Data()));
1836         
1837                         if (result)
1838                                 delete result;
1839                                 
1840                         return 0;
1841                 }
1842                 
1843                 AliInfo(Form("Retrieved entries %d..%d (total %d); E.g. %s has %d values collected",
1844                                         index, endIndex, totalEntries, entries->At(index)->GetName(), ((TObjArray*)
1845                                         partialResult->GetValue(entries->At(index)->GetName()))->GetEntriesFast()));
1846                 
1847                 if (!result)
1848                 {
1849                         result = partialResult;
1850                 }
1851                 else
1852                 {               
1853                         TIter iter(partialResult);
1854                         TObjString* key = 0;
1855                         while ((key = (TObjString*) iter.Next()))
1856                                 result->Add(key, partialResult->GetValue(key->String()));
1857                                 
1858                         partialResult->SetOwner(kFALSE);
1859                         delete partialResult;
1860                 }
1861         
1862         }
1863
1864         return result;
1865 }
1866 //______________________________________________________________________________________________
1867 const char* AliShuttle::GetFile(Int_t system, const char* detector,
1868                 const char* id, const char* source)
1869 {
1870         // Get calibration file from file exchange servers
1871         // First queris the FXS database for the file name, using the run, detector, id and source info
1872         // then calls RetrieveFile(filename) for actual copy to local disk
1873         // run: current run being processed (given by Logbook entry fLogbookEntry)
1874         // detector: the Preprocessor name
1875         // id: provided as a parameter by the Preprocessor
1876         // source: provided by the Preprocessor through GetFileSources function
1877
1878         // check if test mode should simulate a FXS error
1879         if (fTestMode & kErrorFXSFiles)
1880         {
1881                 Log(detector, Form("GetFile - In TESTMODE - Simulating error while connecting to %s FXS", GetSystemName(system)));
1882                 return 0;
1883         }
1884         
1885         // check connection, in case connect
1886         if (!Connect(system))
1887         {
1888                 Log(detector, Form("GetFile - Couldn't connect to %s FXS database", GetSystemName(system)));
1889                 return 0;
1890         }
1891
1892         // Query preparation
1893         TString sourceName(source);
1894         Int_t nFields = 3;
1895         TString sqlQueryStart = Form("select filePath,size,fileChecksum from %s where",
1896                                                                 fConfig->GetFXSdbTable(system));
1897         TString whereClause = Form("run=%d and detector=\"%s\" and fileId=\"%s\"",
1898                                                                 GetCurrentRun(), detector, id);
1899
1900         if (system == kDAQ)
1901         {
1902                 whereClause += Form(" and DAQsource=\"%s\"", source);
1903         }
1904         else if (system == kDCS)
1905         {
1906                 sourceName="none";
1907         }
1908         else if (system == kHLT)
1909         {
1910                 whereClause += Form(" and DDLnumbers=\"%s\"", source);
1911                 nFields = 3;
1912         }
1913
1914         TString sqlQuery = Form("%s %s", sqlQueryStart.Data(), whereClause.Data());
1915
1916         AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
1917
1918         // Query execution
1919         TSQLResult* aResult = 0;
1920         aResult = dynamic_cast<TSQLResult*> (fServer[system]->Query(sqlQuery));
1921         if (!aResult) {
1922                 Log(detector, Form("GetFileName - Can't execute SQL query to %s database for: id = %s, source = %s",
1923                                 GetSystemName(system), id, sourceName.Data()));
1924                 return 0;
1925         }
1926
1927         if(aResult->GetRowCount() == 0)
1928         {
1929                 Log(detector,
1930                         Form("GetFileName - No entry in %s FXS db for: id = %s, source = %s",
1931                                 GetSystemName(system), id, sourceName.Data()));
1932                 delete aResult;
1933                 return 0;
1934         }
1935
1936         if (aResult->GetRowCount() > 1) {
1937                 Log(detector,
1938                         Form("GetFileName - More than one entry in %s FXS db for: id = %s, source = %s",
1939                                 GetSystemName(system), id, sourceName.Data()));
1940                 delete aResult;
1941                 return 0;
1942         }
1943
1944         if (aResult->GetFieldCount() != nFields) {
1945                 Log(detector,
1946                         Form("GetFileName - Wrong field count in %s FXS db for: id = %s, source = %s",
1947                                 GetSystemName(system), id, sourceName.Data()));
1948                 delete aResult;
1949                 return 0;
1950         }
1951
1952         TSQLRow* aRow = dynamic_cast<TSQLRow*> (aResult->Next());
1953
1954         if (!aRow){
1955                 Log(detector, Form("GetFileName - Empty set result in %s FXS db from query: id = %s, source = %s",
1956                                 GetSystemName(system), id, sourceName.Data()));
1957                 delete aResult;
1958                 return 0;
1959         }
1960
1961         TString filePath(aRow->GetField(0), aRow->GetFieldLength(0));
1962         TString fileSize(aRow->GetField(1), aRow->GetFieldLength(1));
1963         TString fileChecksum(aRow->GetField(2), aRow->GetFieldLength(2));
1964
1965         delete aResult;
1966         delete aRow;
1967
1968         AliDebug(2, Form("filePath = %s; size = %s, fileChecksum = %s",
1969                                 filePath.Data(), fileSize.Data(), fileChecksum.Data()));
1970
1971         // retrieved file is renamed to make it unique
1972         TString localFileName = Form("%s_%s_%d_%s_%s.shuttle",
1973                                         GetSystemName(system), detector, GetCurrentRun(), id, sourceName.Data());
1974
1975
1976         // file retrieval from FXS
1977         UInt_t nRetries = 0;
1978         UInt_t maxRetries = 3;
1979         Bool_t result = kFALSE;
1980
1981         // copy!! if successful TSystem::Exec returns 0
1982         while(nRetries++ < maxRetries) {
1983                 AliDebug(2, Form("Trying to copy file. Retry # %d", nRetries));
1984                 result = RetrieveFile(system, filePath.Data(), localFileName.Data());
1985                 if(!result)
1986                 {
1987                         Log(detector, Form("GetFileName - Copy of file %s from %s FXS failed",
1988                                         filePath.Data(), GetSystemName(system)));
1989                         continue;
1990                 } else {
1991                         AliInfo(Form("File %s copied from %s FXS into %s/%s",
1992                                                 filePath.Data(), GetSystemName(system),
1993                                                 GetShuttleTempDir(), localFileName.Data()));
1994                 }
1995
1996                 if (fileChecksum.Length()>0)
1997                 {
1998                         // compare md5sum of local file with the one stored in the FXS DB
1999                         Int_t md5Comp = gSystem->Exec(Form("md5sum %s/%s |grep %s 2>&1 > /dev/null",
2000                                                 GetShuttleTempDir(), localFileName.Data(), fileChecksum.Data()));
2001
2002                         if (md5Comp != 0)
2003                         {
2004                                 Log(detector, Form("GetFileName - md5sum of file %s does not match with local copy!",
2005                                                         filePath.Data()));
2006                                 result = kFALSE;
2007                                 continue;
2008                         }
2009                 } else {
2010                         Log(fCurrentDetector, Form("GetFile - md5sum of file %s not set in %s database, skipping comparison",
2011                                                         filePath.Data(), GetSystemName(system)));
2012                 }
2013                 if (result) break;
2014         }
2015
2016         if(!result) return 0;
2017
2018         fFXSCalled[system]=kTRUE;
2019         TObjString *fileParams = new TObjString(Form("%s#!?!#%s", id, sourceName.Data()));
2020         fFXSlist[system].Add(fileParams);
2021
2022         static TString fullLocalFileName;
2023         fullLocalFileName.Form("%s/%s", GetShuttleTempDir(), localFileName.Data());
2024
2025         Log(fCurrentDetector, Form("GetFile - Retrieved file with id %s and source %s from %s to %s", id, source, GetSystemName(system), fullLocalFileName.Data()));
2026
2027         return fullLocalFileName.Data();
2028 }
2029
2030 //______________________________________________________________________________________________
2031 Bool_t AliShuttle::RetrieveFile(UInt_t system, const char* fxsFileName, const char* localFileName)
2032 {
2033         //
2034         // Copies file from FXS to local Shuttle machine
2035         //
2036
2037         // check temp directory: trying to cd to temp; if it does not exist, create it
2038         AliDebug(2, Form("Copy file %s from %s FXS into %s/%s",
2039                         GetSystemName(system), fxsFileName, GetShuttleTempDir(), localFileName));
2040
2041         void* dir = gSystem->OpenDirectory(GetShuttleTempDir());
2042         if (dir == NULL) {
2043                 if (gSystem->mkdir(GetShuttleTempDir(), kTRUE)) {
2044                         AliError(Form("Can't open directory <%s>", GetShuttleTempDir()));
2045                         return kFALSE;
2046                 }
2047
2048         } else {
2049                 gSystem->FreeDirectory(dir);
2050         }
2051
2052         TString baseFXSFolder;
2053         if (system == kDAQ)
2054         {
2055                 baseFXSFolder = "FES/";
2056         }
2057         else if (system == kDCS)
2058         {
2059                 baseFXSFolder = "";
2060         }
2061         else if (system == kHLT)
2062         {
2063                 baseFXSFolder = "/opt/FXS";
2064         }
2065
2066
2067         TString command = Form("scp -oPort=%d -2 %s@%s:%s%s %s/%s",
2068                 fConfig->GetFXSPort(system),
2069                 fConfig->GetFXSUser(system),
2070                 fConfig->GetFXSHost(system),
2071                 baseFXSFolder.Data(),
2072                 fxsFileName,
2073                 GetShuttleTempDir(),
2074                 localFileName);
2075
2076         AliDebug(2, Form("%s",command.Data()));
2077
2078         Bool_t result = (gSystem->Exec(command.Data()) == 0);
2079
2080         return result;
2081 }
2082
2083 //______________________________________________________________________________________________
2084 TList* AliShuttle::GetFileSources(Int_t system, const char* detector, const char* id)
2085 {
2086         //
2087         // Get sources producing the condition file Id from file exchange servers
2088         // if id is NULL all sources are returned (distinct)
2089         //
2090
2091         Log(detector, Form("GetFileSources - Retrieving sources with id %s from %s", id, GetSystemName(system)));
2092         
2093         // check if test mode should simulate a FXS error
2094         if (fTestMode & kErrorFXSSources)
2095         {
2096                 Log(detector, Form("GetFileSources - In TESTMODE - Simulating error while connecting to %s FXS", GetSystemName(system)));
2097                 return 0;
2098         }
2099
2100         if (system == kDCS)
2101         {
2102                 AliWarning("DCS system has only one source of data!");
2103                 TList *list = new TList();
2104                 list->SetOwner(1);
2105                 list->Add(new TObjString(" "));
2106                 return list;
2107         }
2108
2109         // check connection, in case connect
2110         if (!Connect(system))
2111         {
2112                 Log(detector, Form("GetFileSources - Couldn't connect to %s FXS database", GetSystemName(system)));
2113                 return NULL;
2114         }
2115
2116         TString sourceName = 0;
2117         if (system == kDAQ)
2118         {
2119                 sourceName = "DAQsource";
2120         } else if (system == kHLT)
2121         {
2122                 sourceName = "DDLnumbers";
2123         }
2124
2125         TString sqlQueryStart = Form("select distinct %s from %s where", sourceName.Data(), fConfig->GetFXSdbTable(system));
2126         TString whereClause = Form("run=%d and detector=\"%s\"",
2127                                 GetCurrentRun(), detector);
2128         if (id)
2129                 whereClause += Form(" and fileId=\"%s\"", id);
2130         TString sqlQuery = Form("%s %s", sqlQueryStart.Data(), whereClause.Data());
2131
2132         AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
2133
2134         // Query execution
2135         TSQLResult* aResult;
2136         aResult = fServer[system]->Query(sqlQuery);
2137         if (!aResult) {
2138                 Log(detector, Form("GetFileSources - Can't execute SQL query to %s database for id: %s",
2139                                 GetSystemName(system), id));
2140                 return 0;
2141         }
2142
2143         TList *list = new TList();
2144         list->SetOwner(1);
2145         
2146         if (aResult->GetRowCount() == 0)
2147         {
2148                 Log(detector,
2149                         Form("GetFileSources - No entry in %s FXS table for id: %s", GetSystemName(system), id));
2150                 delete aResult;
2151                 return list;
2152         }
2153
2154         Log(detector, Form("GetFileSources - Found %d sources", aResult->GetRowCount()));
2155
2156         TSQLRow* aRow;
2157         while ((aRow = aResult->Next()))
2158         {
2159
2160                 TString source(aRow->GetField(0), aRow->GetFieldLength(0));
2161                 AliDebug(2, Form("%s = %s", sourceName.Data(), source.Data()));
2162                 list->Add(new TObjString(source));
2163                 delete aRow;
2164         }
2165
2166         delete aResult;
2167
2168         return list;
2169 }
2170
2171 //______________________________________________________________________________________________
2172 TList* AliShuttle::GetFileIDs(Int_t system, const char* detector, const char* source)
2173 {
2174         //
2175         // Get all ids of condition files produced by a given source from file exchange servers
2176         //
2177         
2178         Log(detector, Form("GetFileIDs - Retrieving ids with source %s with %s", source, GetSystemName(system)));
2179
2180         // check if test mode should simulate a FXS error
2181         if (fTestMode & kErrorFXSSources)
2182         {
2183                 Log(detector, Form("GetFileIDs - In TESTMODE - Simulating error while connecting to %s FXS", GetSystemName(system)));
2184                 return 0;
2185         }
2186
2187         // check connection, in case connect
2188         if (!Connect(system))
2189         {
2190                 Log(detector, Form("GetFileIDs - Couldn't connect to %s FXS database", GetSystemName(system)));
2191                 return NULL;
2192         }
2193
2194         TString sourceName = 0;
2195         if (system == kDAQ)
2196         {
2197                 sourceName = "DAQsource";
2198         } else if (system == kHLT)
2199         {
2200                 sourceName = "DDLnumbers";
2201         }
2202
2203         TString sqlQueryStart = Form("select fileId from %s where", fConfig->GetFXSdbTable(system));
2204         TString whereClause = Form("run=%d and detector=\"%s\"",
2205                                 GetCurrentRun(), detector);
2206         if (sourceName.Length() > 0 && source)
2207                 whereClause += Form(" and %s=\"%s\"", sourceName.Data(), source);
2208         TString sqlQuery = Form("%s %s", sqlQueryStart.Data(), whereClause.Data());
2209
2210         AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
2211
2212         // Query execution
2213         TSQLResult* aResult;
2214         aResult = fServer[system]->Query(sqlQuery);
2215         if (!aResult) {
2216                 Log(detector, Form("GetFileIDs - Can't execute SQL query to %s database for source: %s",
2217                                 GetSystemName(system), source));
2218                 return 0;
2219         }
2220
2221         TList *list = new TList();
2222         list->SetOwner(1);
2223         
2224         if (aResult->GetRowCount() == 0)
2225         {
2226                 Log(detector,
2227                         Form("GetFileIDs - No entry in %s FXS table for source: %s", GetSystemName(system), source));
2228                 delete aResult;
2229                 return list;
2230         }
2231
2232         Log(detector, Form("GetFileIDs - Found %d ids", aResult->GetRowCount()));
2233
2234         TSQLRow* aRow;
2235
2236         while ((aRow = aResult->Next()))
2237         {
2238
2239                 TString id(aRow->GetField(0), aRow->GetFieldLength(0));
2240                 AliDebug(2, Form("fileId = %s", id.Data()));
2241                 list->Add(new TObjString(id));
2242                 delete aRow;
2243         }
2244
2245         delete aResult;
2246
2247         return list;
2248 }
2249
2250 //______________________________________________________________________________________________
2251 Bool_t AliShuttle::Connect(Int_t system)
2252 {
2253         // Connect to MySQL Server of the system's FXS MySQL databases
2254         // DAQ Logbook, Shuttle Logbook and DAQ FXS db are on the same host
2255         //
2256
2257         // check connection: if already connected return
2258         if(fServer[system] && fServer[system]->IsConnected()) return kTRUE;
2259
2260         TString dbHost, dbUser, dbPass, dbName;
2261
2262         if (system < 3) // FXS db servers
2263         {
2264                 dbHost = Form("mysql://%s:%d", fConfig->GetFXSdbHost(system), fConfig->GetFXSdbPort(system));
2265                 dbUser = fConfig->GetFXSdbUser(system);
2266                 dbPass = fConfig->GetFXSdbPass(system);
2267                 dbName =   fConfig->GetFXSdbName(system);
2268         } else { // Run & Shuttle logbook servers
2269         // TODO Will the Shuttle logbook server be the same as the Run logbook server ???
2270                 dbHost = Form("mysql://%s:%d", fConfig->GetDAQlbHost(), fConfig->GetDAQlbPort());
2271                 dbUser = fConfig->GetDAQlbUser();
2272                 dbPass = fConfig->GetDAQlbPass();
2273                 dbName =   fConfig->GetDAQlbDB();
2274         }
2275
2276         fServer[system] = TSQLServer::Connect(dbHost.Data(), dbUser.Data(), dbPass.Data());
2277         if (!fServer[system] || !fServer[system]->IsConnected()) {
2278                 if(system < 3)
2279                 {
2280                 AliError(Form("Can't establish connection to FXS database for %s",
2281                                         AliShuttleInterface::GetSystemName(system)));
2282                 } else {
2283                 AliError("Can't establish connection to Run logbook.");
2284                 }
2285                 if(fServer[system]) delete fServer[system];
2286                 return kFALSE;
2287         }
2288
2289         // Get tables
2290         TSQLResult* aResult=0;
2291         switch(system){
2292                 case kDAQ:
2293                         aResult = fServer[kDAQ]->GetTables(dbName.Data());
2294                         break;
2295                 case kDCS:
2296                         aResult = fServer[kDCS]->GetTables(dbName.Data());
2297                         break;
2298                 case kHLT:
2299                         aResult = fServer[kHLT]->GetTables(dbName.Data());
2300                         break;
2301                 default:
2302                         aResult = fServer[3]->GetTables(dbName.Data());
2303                         break;
2304         }
2305
2306         delete aResult;
2307         return kTRUE;
2308 }
2309
2310 //______________________________________________________________________________________________
2311 Bool_t AliShuttle::UpdateTable()
2312 {
2313         //
2314         // Update FXS table filling time_processed field in all rows corresponding to current run and detector
2315         //
2316
2317         Bool_t result = kTRUE;
2318
2319         for (UInt_t system=0; system<3; system++)
2320         {
2321                 if(!fFXSCalled[system]) continue;
2322
2323                 // check connection, in case connect
2324                 if (!Connect(system))
2325                 {
2326                         Log(fCurrentDetector, Form("UpdateTable - Couldn't connect to %s FXS database", GetSystemName(system)));
2327                         result = kFALSE;
2328                         continue;
2329                 }
2330
2331                 TTimeStamp now; // now
2332
2333                 // Loop on FXS list entries
2334                 TIter iter(&fFXSlist[system]);
2335                 TObjString *aFXSentry=0;
2336                 while ((aFXSentry = dynamic_cast<TObjString*> (iter.Next())))
2337                 {
2338                         TString aFXSentrystr = aFXSentry->String();
2339                         TObjArray *aFXSarray = aFXSentrystr.Tokenize("#!?!#");
2340                         if (!aFXSarray || aFXSarray->GetEntries() != 2 )
2341                         {
2342                                 Log(fCurrentDetector, Form("UpdateTable - error updating %s FXS entry. Check string: <%s>",
2343                                         GetSystemName(system), aFXSentrystr.Data()));
2344                                 if(aFXSarray) delete aFXSarray;
2345                                 result = kFALSE;
2346                                 continue;
2347                         }
2348                         const char* fileId = ((TObjString*) aFXSarray->At(0))->GetName();
2349                         const char* source = ((TObjString*) aFXSarray->At(1))->GetName();
2350
2351                         TString whereClause;
2352                         if (system == kDAQ)
2353                         {
2354                                 whereClause = Form("where run=%d and detector=\"%s\" and fileId=\"%s\" and DAQsource=\"%s\";",
2355                                                         GetCurrentRun(), fCurrentDetector.Data(), fileId, source);
2356                         }
2357                         else if (system == kDCS)
2358                         {
2359                                 whereClause = Form("where run=%d and detector=\"%s\" and fileId=\"%s\";",
2360                                                         GetCurrentRun(), fCurrentDetector.Data(), fileId);
2361                         }
2362                         else if (system == kHLT)
2363                         {
2364                                 whereClause = Form("where run=%d and detector=\"%s\" and fileId=\"%s\" and DDLnumbers=\"%s\";",
2365                                                         GetCurrentRun(), fCurrentDetector.Data(), fileId, source);
2366                         }
2367
2368                         delete aFXSarray;
2369
2370                         TString sqlQuery = Form("update %s set time_processed=%d %s", fConfig->GetFXSdbTable(system),
2371                                                                 now.GetSec(), whereClause.Data());
2372
2373                         AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
2374
2375                         // Query execution
2376                         TSQLResult* aResult;
2377                         aResult = dynamic_cast<TSQLResult*> (fServer[system]->Query(sqlQuery));
2378                         if (!aResult)
2379                         {
2380                                 Log(fCurrentDetector, Form("UpdateTable - %s db: can't execute SQL query <%s>",
2381                                                                 GetSystemName(system), sqlQuery.Data()));
2382                                 result = kFALSE;
2383                                 continue;
2384                         }
2385                         delete aResult;
2386                 }
2387         }
2388
2389         return result;
2390 }
2391
2392 //______________________________________________________________________________________________
2393 Bool_t AliShuttle::UpdateTableFailCase()
2394 {
2395         // Update FXS table filling time_processed field in all rows corresponding to current run and detector
2396         // this is called in case the preprocessor is declared failed for the current run, because
2397         // the fields are updated only in case of success
2398
2399         Bool_t result = kTRUE;
2400
2401         for (UInt_t system=0; system<3; system++)
2402         {
2403                 // check connection, in case connect
2404                 if (!Connect(system))
2405                 {
2406                         Log(fCurrentDetector, Form("UpdateTableFailCase - Couldn't connect to %s FXS database",
2407                                                         GetSystemName(system)));
2408                         result = kFALSE;
2409                         continue;
2410                 }
2411
2412                 TTimeStamp now; // now
2413
2414                 // Loop on FXS list entries
2415
2416                 TString whereClause = Form("where run=%d and detector=\"%s\";",
2417                                                 GetCurrentRun(), fCurrentDetector.Data());
2418
2419
2420                 TString sqlQuery = Form("update %s set time_processed=%d %s", fConfig->GetFXSdbTable(system),
2421                                                         now.GetSec(), whereClause.Data());
2422
2423                 AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
2424
2425                 // Query execution
2426                 TSQLResult* aResult;
2427                 aResult = dynamic_cast<TSQLResult*> (fServer[system]->Query(sqlQuery));
2428                 if (!aResult)
2429                 {
2430                         Log(fCurrentDetector, Form("UpdateTableFailCase - %s db: can't execute SQL query <%s>",
2431                                                         GetSystemName(system), sqlQuery.Data()));
2432                         result = kFALSE;
2433                         continue;
2434                 }
2435                 delete aResult;
2436         }
2437
2438         return result;
2439 }
2440
2441 //______________________________________________________________________________________________
2442 Bool_t AliShuttle::UpdateShuttleLogbook(const char* detector, const char* status)
2443 {
2444         //
2445         // Update Shuttle logbook filling detector or shuttle_done column
2446         // ex. of usage: UpdateShuttleLogbook("PHOS", "DONE") or UpdateShuttleLogbook("shuttle_done")
2447         //
2448
2449         // check connection, in case connect
2450         if(!Connect(3)){
2451                 Log("SHUTTLE", "UpdateShuttleLogbook - Couldn't connect to DAQ Logbook.");
2452                 return kFALSE;
2453         }
2454
2455         TString detName(detector);
2456         TString setClause;
2457         if(detName == "shuttle_done")
2458         {
2459                 setClause = "set shuttle_done=1";
2460
2461                 // Send the information to ML
2462                 TMonaLisaText  mlStatus("SHUTTLE_status", "Done");
2463
2464                 TList mlList;
2465                 mlList.Add(&mlStatus);
2466
2467                 fMonaLisa->SendParameters(&mlList);
2468         } else {
2469                 TString statusStr(status);
2470                 if(statusStr.Contains("done", TString::kIgnoreCase) ||
2471                    statusStr.Contains("failed", TString::kIgnoreCase)){
2472                         setClause = Form("set %s=\"%s\"", detector, status);
2473                 } else {
2474                         Log("SHUTTLE",
2475                                 Form("UpdateShuttleLogbook - Invalid status <%s> for detector %s",
2476                                         status, detector));
2477                         return kFALSE;
2478                 }
2479         }
2480
2481         TString whereClause = Form("where run=%d", GetCurrentRun());
2482
2483         TString sqlQuery = Form("update %s %s %s",
2484                                         fConfig->GetShuttlelbTable(), setClause.Data(), whereClause.Data());
2485
2486         AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
2487
2488         // Query execution
2489         TSQLResult* aResult;
2490         aResult = dynamic_cast<TSQLResult*> (fServer[3]->Query(sqlQuery));
2491         if (!aResult) {
2492                 Log("SHUTTLE", Form("UpdateShuttleLogbook - Can't execute query <%s>", sqlQuery.Data()));
2493                 return kFALSE;
2494         }
2495         delete aResult;
2496
2497         return kTRUE;
2498 }
2499
2500 //______________________________________________________________________________________________
2501 Int_t AliShuttle::GetCurrentRun() const
2502 {
2503         //
2504         // Get current run from logbook entry
2505         //
2506
2507         return fLogbookEntry ? fLogbookEntry->GetRun() : -1;
2508 }
2509
2510 //______________________________________________________________________________________________
2511 UInt_t AliShuttle::GetCurrentStartTime() const
2512 {
2513         //
2514         // get current start time
2515         //
2516
2517         return fLogbookEntry ? fLogbookEntry->GetStartTime() : 0;
2518 }
2519
2520 //______________________________________________________________________________________________
2521 UInt_t AliShuttle::GetCurrentEndTime() const
2522 {
2523         //
2524         // get current end time from logbook entry
2525         //
2526
2527         return fLogbookEntry ? fLogbookEntry->GetEndTime() : 0;
2528 }
2529
2530 //______________________________________________________________________________________________
2531 void AliShuttle::Log(const char* detector, const char* message)
2532 {
2533         //
2534         // Fill log string with a message
2535         //
2536
2537         void* dir = gSystem->OpenDirectory(GetShuttleLogDir());
2538         if (dir == NULL) {
2539                 if (gSystem->mkdir(GetShuttleLogDir(), kTRUE)) {
2540                         AliError(Form("Can't open directory <%s>", GetShuttleLogDir()));
2541                         return;
2542                 }
2543
2544         } else {
2545                 gSystem->FreeDirectory(dir);
2546         }
2547
2548         TString toLog = Form("%s (%d): %s - ", TTimeStamp(time(0)).AsString("s"), getpid(), detector);
2549         if (GetCurrentRun() >= 0) 
2550                 toLog += Form("run %d - ", GetCurrentRun());
2551         toLog += Form("%s", message);
2552
2553         AliInfo(toLog.Data());
2554         
2555         // if we redirect the log output already to the file, leave here
2556         if (fOutputRedirected && strcmp(detector, "SHUTTLE") != 0)
2557                 return;
2558
2559         TString fileName = GetLogFileName(detector);
2560         
2561         gSystem->ExpandPathName(fileName);
2562
2563         ofstream logFile;
2564         logFile.open(fileName, ofstream::out | ofstream::app);
2565
2566         if (!logFile.is_open()) {
2567                 AliError(Form("Could not open file %s", fileName.Data()));
2568                 return;
2569         }
2570
2571         logFile << toLog.Data() << "\n";
2572
2573         logFile.close();
2574 }
2575
2576 //______________________________________________________________________________________________
2577 TString AliShuttle::GetLogFileName(const char* detector) const
2578 {
2579         // 
2580         // returns the name of the log file for a given sub detector
2581         //
2582         
2583         TString fileName;
2584         
2585         if (GetCurrentRun() >= 0) 
2586                 fileName.Form("%s/%s_%d.log", GetShuttleLogDir(), detector, GetCurrentRun());
2587         else
2588                 fileName.Form("%s/%s.log", GetShuttleLogDir(), detector);
2589
2590         return fileName;
2591 }
2592
2593 //______________________________________________________________________________________________
2594 Bool_t AliShuttle::Collect(Int_t run)
2595 {
2596         //
2597         // Collects conditions data for all UNPROCESSED run written to DAQ LogBook in case of run = -1 (default)
2598         // If a dedicated run is given this run is processed
2599         //
2600         // In operational mode, this is the Shuttle function triggered by the EOR signal.
2601         //
2602
2603         if (run == -1)
2604                 Log("SHUTTLE","Collect - Shuttle called. Collecting conditions data for unprocessed runs");
2605         else
2606                 Log("SHUTTLE", Form("Collect - Shuttle called. Collecting conditions data for run %d", run));
2607
2608         SetLastAction("Starting");
2609
2610         TString whereClause("where shuttle_done=0");
2611         if (run != -1)
2612                 whereClause += Form(" and run=%d", run);
2613
2614         TObjArray shuttleLogbookEntries;
2615         if (!QueryShuttleLogbook(whereClause, shuttleLogbookEntries))
2616         {
2617                 Log("SHUTTLE", "Collect - Can't retrieve entries from Shuttle logbook");
2618                 return kFALSE;
2619         }
2620
2621         if (shuttleLogbookEntries.GetEntries() == 0)
2622         {
2623                 if (run == -1)
2624                         Log("SHUTTLE","Collect - Found no UNPROCESSED runs in Shuttle logbook");
2625                 else
2626                         Log("SHUTTLE", Form("Collect - Run %d is already DONE "
2627                                                 "or it does not exist in Shuttle logbook", run));
2628                 return kTRUE;
2629         }
2630
2631         for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
2632                 fFirstUnprocessed[iDet] = kTRUE;
2633
2634         if (run != -1)
2635         {
2636                 // query Shuttle logbook for earlier runs, check if some detectors are unprocessed,
2637                 // flag them into fFirstUnprocessed array
2638                 TString whereClause(Form("where shuttle_done=0 and run < %d", run));
2639                 TObjArray tmpLogbookEntries;
2640                 if (!QueryShuttleLogbook(whereClause, tmpLogbookEntries))
2641                 {
2642                         Log("SHUTTLE", "Collect - Can't retrieve entries from Shuttle logbook");
2643                         return kFALSE;
2644                 }
2645
2646                 TIter iter(&tmpLogbookEntries);
2647                 AliShuttleLogbookEntry* anEntry = 0;
2648                 while ((anEntry = dynamic_cast<AliShuttleLogbookEntry*> (iter.Next())))
2649                 {
2650                         for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
2651                         {
2652                                 if (anEntry->GetDetectorStatus(iDet) == AliShuttleLogbookEntry::kUnprocessed)
2653                                 {
2654                                         AliDebug(2, Form("Run %d: setting %s as \"not first time unprocessed\"",
2655                                                         anEntry->GetRun(), GetDetName(iDet)));
2656                                         fFirstUnprocessed[iDet] = kFALSE;
2657                                 }
2658                         }
2659
2660                 }
2661
2662         }
2663
2664         if (!RetrieveConditionsData(shuttleLogbookEntries))
2665         {
2666                 Log("SHUTTLE", "Collect - Process of at least one run failed");
2667                 return kFALSE;
2668         }
2669
2670         Log("SHUTTLE", "Collect - Requested run(s) successfully processed");
2671         return kTRUE;
2672 }
2673
2674 //______________________________________________________________________________________________
2675 Bool_t AliShuttle::RetrieveConditionsData(const TObjArray& dateEntries)
2676 {
2677         //
2678         // Retrieve conditions data for all runs that aren't processed yet
2679         //
2680
2681         Bool_t hasError = kFALSE;
2682
2683         TIter iter(&dateEntries);
2684         AliShuttleLogbookEntry* anEntry;
2685
2686         while ((anEntry = (AliShuttleLogbookEntry*) iter.Next())){
2687                 if (!Process(anEntry)){
2688                         hasError = kTRUE;
2689                 }
2690
2691                 // clean SHUTTLE temp directory
2692                 TString filename = Form("%s/*.shuttle", GetShuttleTempDir());
2693                 RemoveFile(filename.Data());
2694         }
2695
2696         return hasError == kFALSE;
2697 }
2698
2699 //______________________________________________________________________________________________
2700 ULong_t AliShuttle::GetTimeOfLastAction() const
2701 {
2702         //
2703         // Gets time of last action
2704         //
2705
2706         ULong_t tmp;
2707
2708         fMonitoringMutex->Lock();
2709
2710         tmp = fLastActionTime;
2711
2712         fMonitoringMutex->UnLock();
2713
2714         return tmp;
2715 }
2716
2717 //______________________________________________________________________________________________
2718 const TString AliShuttle::GetLastAction() const
2719 {
2720         //
2721         // returns a string description of the last action
2722         //
2723
2724         TString tmp;
2725
2726         fMonitoringMutex->Lock();
2727         
2728         tmp = fLastAction;
2729         
2730         fMonitoringMutex->UnLock();
2731
2732         return tmp;
2733 }
2734
2735 //______________________________________________________________________________________________
2736 void AliShuttle::SetLastAction(const char* action)
2737 {
2738         //
2739         // updates the monitoring variables
2740         //
2741
2742         fMonitoringMutex->Lock();
2743
2744         fLastAction = action;
2745         fLastActionTime = time(0);
2746         
2747         fMonitoringMutex->UnLock();
2748 }
2749
2750 //______________________________________________________________________________________________
2751 const char* AliShuttle::GetRunParameter(const char* param)
2752 {
2753         //
2754         // returns run parameter read from DAQ logbook
2755         //
2756
2757         if(!fLogbookEntry) {
2758                 AliError("No logbook entry!");
2759                 return 0;
2760         }
2761
2762         return fLogbookEntry->GetRunParameter(param);
2763 }
2764
2765 //______________________________________________________________________________________________
2766 AliCDBEntry* AliShuttle::GetFromOCDB(const char* detector, const AliCDBPath& path)
2767 {
2768         //
2769         // returns object from OCDB valid for current run
2770         //
2771
2772         if (fTestMode & kErrorOCDB)
2773         {
2774                 Log(detector, "GetFromOCDB - In TESTMODE - Simulating error with OCDB");
2775                 return 0;
2776         }
2777         
2778         AliCDBStorage *sto = AliCDBManager::Instance()->GetStorage(fgkMainCDB);
2779         if (!sto)
2780         {
2781                 Log(detector, "GetFromOCDB - Cannot activate main OCDB for query!");
2782                 return 0;
2783         }
2784
2785         return dynamic_cast<AliCDBEntry*> (sto->Get(path, GetCurrentRun()));
2786 }
2787
2788 //______________________________________________________________________________________________
2789 Bool_t AliShuttle::SendMail()
2790 {
2791         //
2792         // sends a mail to the subdetector expert in case of preprocessor error
2793         //
2794         
2795         if (fTestMode != kNone)
2796                 return kTRUE;
2797
2798         void* dir = gSystem->OpenDirectory(GetShuttleLogDir());
2799         if (dir == NULL)
2800         {
2801                 if (gSystem->mkdir(GetShuttleLogDir(), kTRUE))
2802                 {
2803                         AliError(Form("Can't open directory <%s>", GetShuttleLogDir()));
2804                         return kFALSE;
2805                 }
2806
2807         } else {
2808                 gSystem->FreeDirectory(dir);
2809         }
2810
2811         TString bodyFileName;
2812         bodyFileName.Form("%s/mail.body", GetShuttleLogDir());
2813         gSystem->ExpandPathName(bodyFileName);
2814
2815         ofstream mailBody;
2816         mailBody.open(bodyFileName, ofstream::out);
2817
2818         if (!mailBody.is_open())
2819         {
2820                 AliError(Form("Could not open mail body file %s", bodyFileName.Data()));
2821                 return kFALSE;
2822         }
2823
2824         TString to="";
2825         TIter iterExperts(fConfig->GetResponsibles(fCurrentDetector));
2826         TObjString *anExpert=0;
2827         while ((anExpert = (TObjString*) iterExperts.Next()))
2828         {
2829                 to += Form("%s,", anExpert->GetName());
2830         }
2831         to.Remove(to.Length()-1);
2832         AliDebug(2, Form("to: %s",to.Data()));
2833
2834         if (to.IsNull()) {
2835                 AliInfo("List of detector responsibles not yet set!");
2836                 return kFALSE;
2837         }
2838
2839         TString cc="alberto.colla@cern.ch";
2840
2841         TString subject = Form("%s Shuttle preprocessor FAILED in run %d !",
2842                                 fCurrentDetector.Data(), GetCurrentRun());
2843         AliDebug(2, Form("subject: %s", subject.Data()));
2844
2845         TString body = Form("Dear %s expert(s), \n\n", fCurrentDetector.Data());
2846         body += Form("SHUTTLE just detected that your preprocessor "
2847                         "failed processing run %d!!\n\n", GetCurrentRun());
2848         body += Form("Please check %s status on the SHUTTLE monitoring page: \n\n", fCurrentDetector.Data());
2849         body += Form("\thttp://pcalimonitor.cern.ch:8889/shuttle.jsp?time=168 \n\n");
2850         body += Form("Find the %s log for the current run on \n\n"
2851                 "\thttp://pcalishuttle01.cern.ch:8880/logs/%s_%d.log \n\n", 
2852                 fCurrentDetector.Data(), fCurrentDetector.Data(), GetCurrentRun());
2853         body += Form("The last 10 lines of %s log file are following:\n\n");
2854
2855         AliDebug(2, Form("Body begin: %s", body.Data()));
2856
2857         mailBody << body.Data();
2858         mailBody.close();
2859         mailBody.open(bodyFileName, ofstream::out | ofstream::app);
2860
2861         TString logFileName = Form("%s/%s_%d.log", GetShuttleLogDir(), fCurrentDetector.Data(), GetCurrentRun());
2862         TString tailCommand = Form("tail -n 10 %s >> %s", logFileName.Data(), bodyFileName.Data());
2863         if (gSystem->Exec(tailCommand.Data()))
2864         {
2865                 mailBody << Form("%s log file not found ...\n\n", fCurrentDetector.Data());
2866         }
2867
2868         TString endBody = Form("------------------------------------------------------\n\n");
2869         endBody += Form("In case of problems please contact the SHUTTLE core team.\n\n");
2870         endBody += "Please do not answer this message directly, it is automatically generated.\n\n";
2871         endBody += "Greetings,\n\n \t\t\tthe SHUTTLE\n";
2872
2873         AliDebug(2, Form("Body end: %s", endBody.Data()));
2874
2875         mailBody << endBody.Data();
2876
2877         mailBody.close();
2878
2879         // send mail!
2880         TString mailCommand = Form("mail -s \"%s\" -c %s %s < %s",
2881                                                 subject.Data(),
2882                                                 cc.Data(),
2883                                                 to.Data(),
2884                                                 bodyFileName.Data());
2885         AliDebug(2, Form("mail command: %s", mailCommand.Data()));
2886
2887         Bool_t result = gSystem->Exec(mailCommand.Data());
2888
2889         return result == 0;
2890 }
2891
2892 //______________________________________________________________________________________________
2893 const char* AliShuttle::GetRunType()
2894 {
2895         //
2896         // returns run type read from "run type" logbook
2897         //
2898
2899         if(!fLogbookEntry) {
2900                 AliError("No logbook entry!");
2901                 return 0;
2902         }
2903
2904         return fLogbookEntry->GetRunType();
2905 }
2906
2907 //______________________________________________________________________________________________
2908 void AliShuttle::SetShuttleTempDir(const char* tmpDir)
2909 {
2910         //
2911         // sets Shuttle temp directory
2912         //
2913
2914         fgkShuttleTempDir = gSystem->ExpandPathName(tmpDir);
2915 }
2916
2917 //______________________________________________________________________________________________
2918 void AliShuttle::SetShuttleLogDir(const char* logDir)
2919 {
2920         //
2921         // sets Shuttle log directory
2922         //
2923
2924         fgkShuttleLogDir = gSystem->ExpandPathName(logDir);
2925 }