]> git.uio.no Git - u/mrichter/AliRoot.git/blob - SHUTTLE/AliShuttle.cxx
1) Connect and Close are called before and after each query, so one can
[u/mrichter/AliRoot.git] / SHUTTLE / AliShuttle.cxx
1 /**************************************************************************
2  * Copyright(c) 1998-1999, ALICE Experiment at CERN, All rights reserved. *
3  *                                                                        *
4  * Author: The ALICE Off-line Project.                                    *
5  * Contributors are mentioned in the code where appropriate.              *
6  *                                                                        *
7  * Permission to use, copy, modify and distribute this software and its   *
8  * documentation strictly for non-commercial purposes is hereby granted   *
9  * without fee, provided that the above copyright notice appears in all   *
10  * copies and that both the copyright notice and this permission notice   *
11  * appear in the supporting documentation. The authors make no claims     *
12  * about the suitability of this software for any purpose. It is          *
13  * provided "as is" without express or implied warranty.                  *
14  **************************************************************************/
15
16 /*
17 $Log$
18 Revision 1.55  2007/08/06 12:26:40  acolla
19 Function Bool_t GetHLTStatus added to preprocessor. It returns the status of HLT
20 read from the run logbook.
21
22 Revision 1.54  2007/07/12 09:51:25  jgrosseo
23 removed duplicated log message in GetFile
24
25 Revision 1.53  2007/07/12 09:26:28  jgrosseo
26 updating hlt fxs base path
27
28 Revision 1.52  2007/07/12 08:06:45  jgrosseo
29 adding log messages in getfile... functions
30 adding not implemented copy constructor in alishuttleconfigholder
31
32 Revision 1.51  2007/07/03 17:24:52  acolla
33 root moved to v5-16-00. TFileMerger->Cp moved to TFile::Cp.
34
35 Revision 1.50  2007/07/02 17:19:32  acolla
36 preprocessor is run in a temp directory that is removed when process is finished.
37
38 Revision 1.49  2007/06/29 10:45:06  acolla
39 Number of columns in MySql Shuttle logbook increased by one (HLT added)
40
41 Revision 1.48  2007/06/21 13:06:19  acolla
42 GetFileSources returns dummy list with 1 source if system=DCS (better than
43 returning error as it was)
44
45 Revision 1.47  2007/06/19 17:28:56  acolla
46 HLT updated; missing map bug removed.
47
48 Revision 1.46  2007/06/09 13:01:09  jgrosseo
49 Switching to retrieval of several DCS DPs at a time (multiDPrequest)
50
51 Revision 1.45  2007/05/30 06:35:20  jgrosseo
52 Adding functionality to the Shuttle/TestShuttle:
53 o) Function to retrieve list of sources from a given system (GetFileSources with id=0)
54 o) Function to retrieve list of IDs for a given source      (GetFileIDs)
55 These functions are needed for dealing with the tag files that are saved for the GRP preprocessor
56 Example code has been added to the TestProcessor in TestShuttle
57
58 Revision 1.44  2007/05/11 16:09:32  acolla
59 Reference files for ITS, MUON and PHOS are now stored in OfflineDetName/OnlineDetName/run_...
60 example: ITS/SPD/100_filename.root
61
62 Revision 1.43  2007/05/10 09:59:51  acolla
63 Various bug fixes in StoreRefFilesToGrid; Cleaning of reference storage before processing detector (CleanReferenceStorage)
64
65 Revision 1.42  2007/05/03 08:01:39  jgrosseo
66 typo in last commit :-(
67
68 Revision 1.41  2007/05/03 08:00:48  jgrosseo
69 fixing log message when pp want to skip dcs value retrieval
70
71 Revision 1.40  2007/04/27 07:06:48  jgrosseo
72 GetFileSources returns empty list in case of no files, but successful query
73 No mails sent in testmode
74
75 Revision 1.39  2007/04/17 12:43:57  acolla
76 Correction in StoreOCDB; change of text in mail to detector expert
77
78 Revision 1.38  2007/04/12 08:26:18  jgrosseo
79 updated comment
80
81 Revision 1.37  2007/04/10 16:53:14  jgrosseo
82 redirecting sub detector stdout, stderr to sub detector log file
83
84 Revision 1.35  2007/04/04 16:26:38  acolla
85 1. Re-organization of function calls in TestPreprocessor to make it more meaningful.
86 2. Added missing dependency in test preprocessors.
87 3. in AliShuttle.cxx: processing time and memory consumption info on a single line.
88
89 Revision 1.34  2007/04/04 10:33:36  jgrosseo
90 1) Storing of files to the Grid is now done _after_ your preprocessors succeeded. This is transparent, which means that you can still use the same functions (Store, StoreReferenceData) to store files to the Grid. However, the Shuttle first stores them locally and transfers them after the preprocessor finished. The return code of these two functions has changed from UInt_t to Bool_t which gives you the success of the storing.
91 In case of an error with the Grid, the Shuttle will retry the storing later, the preprocessor does not need to be run again.
92
93 2) The meaning of the return code of the preprocessor has changed. 0 is now success and any other value means failure. This value is stored in the log and you can use it to keep details about the error condition.
94
95 3) New function StoreReferenceFile to _directly_ store a file (without opening it) to the reference storage.
96
97 4) The memory usage of the preprocessor is monitored. If it exceeds 2 GB it is terminated.
98
99 5) New function AliPreprocessor::ProcessDCS(). If you do not need to have DCS data in all cases, you can skip the processing by implemting this function and returning kFALSE under certain conditions. E.g. if there is a certain run type.
100 If you always need DCS data (like before), you do not need to implement it.
101
102 6) The run type has been added to the monitoring page
103
104 Revision 1.33  2007/04/03 13:56:01  acolla
105 Grid Storage at the end of preprocessing. Added virtual method to disable DCS query according to the
106 run type.
107
108 Revision 1.32  2007/02/28 10:41:56  acolla
109 Run type field added in SHUTTLE framework. Run type is read from "run type" logbook and retrieved by
110 AliPreprocessor::GetRunType() function.
111 Added some ldap definition files.
112
113 Revision 1.30  2007/02/13 11:23:21  acolla
114 Moved getters and setters of Shuttle's main OCDB/Reference, local
115 OCDB/Reference, temp and log folders to AliShuttleInterface
116
117 Revision 1.27  2007/01/30 17:52:42  jgrosseo
118 adding monalisa monitoring
119
120 Revision 1.26  2007/01/23 19:20:03  acolla
121 Removed old ldif files, added TOF, MCH ldif files. Added some options in
122 AliShuttleConfig::Print. Added in Ali Shuttle: SetShuttleTempDir and
123 SetShuttleLogDir
124
125 Revision 1.25  2007/01/15 19:13:52  acolla
126 Moved some AliInfo to AliDebug in SendMail function
127
128 Revision 1.21  2006/12/07 08:51:26  jgrosseo
129 update (alberto):
130 table, db names in ldap configuration
131 added GRP preprocessor
132 DCS data can also be retrieved by data point
133
134 Revision 1.20  2006/11/16 16:16:48  jgrosseo
135 introducing strict run ordering flag
136 removed giving preprocessor name to preprocessor, they have to know their name themselves ;-)
137
138 Revision 1.19  2006/11/06 14:23:04  jgrosseo
139 major update (Alberto)
140 o) reading of run parameters from the logbook
141 o) online offline naming conversion
142 o) standalone DCSclient package
143
144 Revision 1.18  2006/10/20 15:22:59  jgrosseo
145 o) Adding time out to the execution of the preprocessors: The Shuttle forks and the parent process monitors the child
146 o) Merging Collect, CollectAll, CollectNew function
147 o) Removing implementation of empty copy constructors (declaration still there!)
148
149 Revision 1.17  2006/10/05 16:20:55  jgrosseo
150 adapting to new CDB classes
151
152 Revision 1.16  2006/10/05 15:46:26  jgrosseo
153 applying to the new interface
154
155 Revision 1.15  2006/10/02 16:38:39  jgrosseo
156 update (alberto):
157 fixed memory leaks
158 storing of objects that failed to be stored to the grid before
159 interfacing of shuttle status table in daq system
160
161 Revision 1.14  2006/08/29 09:16:05  jgrosseo
162 small update
163
164 Revision 1.13  2006/08/15 10:50:00  jgrosseo
165 effc++ corrections (alberto)
166
167 Revision 1.12  2006/08/08 14:19:29  jgrosseo
168 Update to shuttle classes (Alberto)
169
170 - Possibility to set the full object's path in the Preprocessor's and
171 Shuttle's  Store functions
172 - Possibility to extend the object's run validity in the same classes
173 ("startValidity" and "validityInfinite" parameters)
174 - Implementation of the StoreReferenceData function to store reference
175 data in a dedicated CDB storage.
176
177 Revision 1.11  2006/07/21 07:37:20  jgrosseo
178 last run is stored after each run
179
180 Revision 1.10  2006/07/20 09:54:40  jgrosseo
181 introducing status management: The processing per subdetector is divided into several steps,
182 after each step the status is stored on disk. If the system crashes in any of the steps the Shuttle
183 can keep track of the number of failures and skips further processing after a certain threshold is
184 exceeded. These thresholds can be configured in LDAP.
185
186 Revision 1.9  2006/07/19 10:09:55  jgrosseo
187 new configuration, accesst to DAQ FES (Alberto)
188
189 Revision 1.8  2006/07/11 12:44:36  jgrosseo
190 adding parameters for extended validity range of data produced by preprocessor
191
192 Revision 1.7  2006/07/10 14:37:09  jgrosseo
193 small fix + todo comment
194
195 Revision 1.6  2006/07/10 13:01:41  jgrosseo
196 enhanced storing of last sucessfully processed run (alberto)
197
198 Revision 1.5  2006/07/04 14:59:57  jgrosseo
199 revision of AliDCSValue: Removed wrapper classes, reduced storage size per value by factor 2
200
201 Revision 1.4  2006/06/12 09:11:16  jgrosseo
202 coding conventions (Alberto)
203
204 Revision 1.3  2006/06/06 14:26:40  jgrosseo
205 o) removed files that were moved to STEER
206 o) shuttle updated to follow the new interface (Alberto)
207
208 Revision 1.2  2006/03/07 07:52:34  hristov
209 New version (B.Yordanov)
210
211 Revision 1.6  2005/11/19 17:19:14  byordano
212 RetrieveDATEEntries and RetrieveConditionsData added
213
214 Revision 1.5  2005/11/19 11:09:27  byordano
215 AliShuttle declaration added
216
217 Revision 1.4  2005/11/17 17:47:34  byordano
218 TList changed to TObjArray
219
220 Revision 1.3  2005/11/17 14:43:23  byordano
221 import to local CVS
222
223 Revision 1.1.1.1  2005/10/28 07:33:58  hristov
224 Initial import as subdirectory in AliRoot
225
226 Revision 1.2  2005/09/13 08:41:15  byordano
227 default startTime endTime added
228
229 Revision 1.4  2005/08/30 09:13:02  byordano
230 some docs added
231
232 Revision 1.3  2005/08/29 21:15:47  byordano
233 some docs added
234
235 */
236
237 //
238 // This class is the main manager for AliShuttle. 
239 // It organizes the data retrieval from DCS and call the 
240 // interface methods of AliPreprocessor.
241 // For every detector in AliShuttleConfgi (see AliShuttleConfig),
242 // data for its set of aliases is retrieved. If there is registered
243 // AliPreprocessor for this detector then it will be used
244 // accroding to the schema (see AliPreprocessor).
245 // If there isn't registered AliPreprocessor than the retrieved
246 // data is stored automatically to the undelying AliCDBStorage.
247 // For detSpec is used the alias name.
248 //
249
250 #include "AliShuttle.h"
251
252 #include "AliCDBManager.h"
253 #include "AliCDBStorage.h"
254 #include "AliCDBId.h"
255 #include "AliCDBRunRange.h"
256 #include "AliCDBPath.h"
257 #include "AliCDBEntry.h"
258 #include "AliShuttleConfig.h"
259 #include "DCSClient/AliDCSClient.h"
260 #include "AliLog.h"
261 #include "AliPreprocessor.h"
262 #include "AliShuttleStatus.h"
263 #include "AliShuttleLogbookEntry.h"
264
265 #include <TSystem.h>
266 #include <TObject.h>
267 #include <TString.h>
268 #include <TTimeStamp.h>
269 #include <TObjString.h>
270 #include <TSQLServer.h>
271 #include <TSQLResult.h>
272 #include <TSQLRow.h>
273 #include <TMutex.h>
274 #include <TSystemDirectory.h>
275 #include <TSystemFile.h>
276 #include <TFile.h>
277 #include <TFileMerger.h>
278 #include <TGrid.h>
279 #include <TGridResult.h>
280
281 #include <TMonaLisaWriter.h>
282
283 #include <fstream>
284
285 #include <sys/types.h>
286 #include <sys/wait.h>
287
288 ClassImp(AliShuttle)
289
290 //______________________________________________________________________________________________
291 AliShuttle::AliShuttle(const AliShuttleConfig* config,
292                 UInt_t timeout, Int_t retries):
293 fConfig(config),
294 fTimeout(timeout), fRetries(retries),
295 fPreprocessorMap(),
296 fLogbookEntry(0),
297 fCurrentDetector(),
298 fStatusEntry(0),
299 fMonitoringMutex(0),
300 fLastActionTime(0),
301 fLastAction(),
302 fMonaLisa(0),
303 fTestMode(kNone),
304 fReadTestMode(kFALSE),
305 fOutputRedirected(kFALSE)
306 {
307         //
308         // config: AliShuttleConfig used
309         // timeout: timeout used for AliDCSClient connection
310         // retries: the number of retries in case of connection error.
311         //
312
313         if (!fConfig->IsValid()) AliFatal("********** !!!!! Invalid configuration !!!!! **********");
314         for(int iSys=0;iSys<4;iSys++) {
315                 fServer[iSys]=0;
316                 if (iSys < 3)
317                         fFXSlist[iSys].SetOwner(kTRUE);
318         }
319         fPreprocessorMap.SetOwner(kTRUE);
320
321         for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
322                 fFirstUnprocessed[iDet] = kFALSE;
323
324         fMonitoringMutex = new TMutex();
325 }
326
327 //______________________________________________________________________________________________
328 AliShuttle::~AliShuttle()
329 {
330         //
331         // destructor
332         //
333
334         fPreprocessorMap.DeleteAll();
335         for(int iSys=0;iSys<4;iSys++)
336                 if(fServer[iSys]) {
337                         fServer[iSys]->Close();
338                         delete fServer[iSys];
339                         fServer[iSys] = 0;
340                 }
341
342         if (fStatusEntry){
343                 delete fStatusEntry;
344                 fStatusEntry = 0;
345         }
346         
347         if (fMonitoringMutex) 
348         {
349                 delete fMonitoringMutex;
350                 fMonitoringMutex = 0;
351         }
352 }
353
354 //______________________________________________________________________________________________
355 void AliShuttle::RegisterPreprocessor(AliPreprocessor* preprocessor)
356 {
357         //
358         // Registers new AliPreprocessor.
359         // It uses GetName() for indentificator of the pre processor.
360         // The pre processor is registered it there isn't any other
361         // with the same identificator (GetName()).
362         //
363
364         const char* detName = preprocessor->GetName();
365         if(GetDetPos(detName) < 0)
366                 AliFatal(Form("********** !!!!! Invalid detector name: %s !!!!! **********", detName));
367
368         if (fPreprocessorMap.GetValue(detName)) {
369                 AliWarning(Form("AliPreprocessor %s is already registered!", detName));
370                 return;
371         }
372
373         fPreprocessorMap.Add(new TObjString(detName), preprocessor);
374 }
375 //______________________________________________________________________________________________
376 Bool_t AliShuttle::Store(const AliCDBPath& path, TObject* object,
377                 AliCDBMetaData* metaData, Int_t validityStart, Bool_t validityInfinite)
378 {
379         // Stores a CDB object in the storage for offline reconstruction. Objects that are not needed for
380         // offline reconstruction, but should be stored anyway (e.g. for debugging) should NOT be stored
381         // using this function. Use StoreReferenceData instead!
382         // It calls StoreLocally function which temporarily stores the data locally; when the preprocessor
383         // finishes the data are transferred to the main storage (Grid).
384
385         return StoreLocally(fgkLocalCDB, path, object, metaData, validityStart, validityInfinite);
386 }
387
388 //______________________________________________________________________________________________
389 Bool_t AliShuttle::StoreReferenceData(const AliCDBPath& path, TObject* object, AliCDBMetaData* metaData)
390 {
391         // Stores a CDB object in the storage for reference data. This objects will not be available during
392         // offline reconstrunction. Use this function for reference data only!
393         // It calls StoreLocally function which temporarily stores the data locally; when the preprocessor
394         // finishes the data are transferred to the main storage (Grid).
395
396         return StoreLocally(fgkLocalRefStorage, path, object, metaData);
397 }
398
399 //______________________________________________________________________________________________
400 Bool_t AliShuttle::StoreLocally(const TString& localUri,
401                         const AliCDBPath& path, TObject* object, AliCDBMetaData* metaData,
402                         Int_t validityStart, Bool_t validityInfinite)
403 {
404         // Store object temporarily in local storage. Parameters are passed by Store and StoreReferenceData functions.
405         // when the preprocessor finishes the data are transferred to the main storage (Grid).
406         // The parameters are:
407         //   1) Uri of the backup storage (Local)
408         //   2) the object's path.
409         //   3) the object to be stored
410         //   4) the metaData to be associated with the object
411         //   5) the validity start run number w.r.t. the current run,
412         //      if the data is valid only for this run leave the default 0
413         //   6) specifies if the calibration data is valid for infinity (this means until updated),
414         //      typical for calibration runs, the default is kFALSE
415         //
416         // returns 0 if fail, 1 otherwise
417
418         if (fTestMode & kErrorStorage)
419         {
420                 Log(fCurrentDetector, "StoreLocally - In TESTMODE - Simulating error while storing locally");
421                 return kFALSE;
422         }
423         
424         const char* cdbType = (localUri == fgkLocalCDB) ? "CDB" : "Reference";
425
426         Int_t firstRun = GetCurrentRun() - validityStart;
427         if(firstRun < 0) {
428                 AliWarning("First valid run happens to be less than 0! Setting it to 0.");
429                 firstRun=0;
430         }
431
432         Int_t lastRun = -1;
433         if(validityInfinite) {
434                 lastRun = AliCDBRunRange::Infinity();
435         } else {
436                 lastRun = GetCurrentRun();
437         }
438
439         // Version is set to current run, it will be used later to transfer data to Grid
440         AliCDBId id(path, firstRun, lastRun, GetCurrentRun(), -1);
441
442         if(! dynamic_cast<TObjString*> (metaData->GetProperty("RunUsed(TObjString)"))){
443                 TObjString runUsed = Form("%d", GetCurrentRun());
444                 metaData->SetProperty("RunUsed(TObjString)", runUsed.Clone());
445         }
446
447         Bool_t result = kFALSE;
448
449         if (!(AliCDBManager::Instance()->GetStorage(localUri))) {
450                 Log("SHUTTLE", Form("StoreLocally - Cannot activate local %s storage", cdbType));
451         } else {
452                 result = AliCDBManager::Instance()->GetStorage(localUri)
453                                         ->Put(object, id, metaData);
454         }
455
456         if(!result) {
457
458                 Log(fCurrentDetector, Form("StoreLocally - Can't store object <%s>!", id.ToString().Data()));
459         }
460
461         return result;
462 }
463
464 //______________________________________________________________________________________________
465 Bool_t AliShuttle::StoreOCDB()
466 {
467         //
468         // Called when preprocessor ends successfully or when previous storage attempt failed (kStoreError status)
469         // Calls underlying StoreOCDB(const char*) function twice, for OCDB and Reference storage.
470         // Then calls StoreRefFilesToGrid to store reference files. 
471         //
472         
473         if (fTestMode & kErrorGrid)
474         {
475                 Log("SHUTTLE", "StoreOCDB - In TESTMODE - Simulating error while storing in the Grid");
476                 Log(fCurrentDetector, "StoreOCDB - In TESTMODE - Simulating error while storing in the Grid");
477                 return kFALSE;
478         }
479         
480         Log("SHUTTLE","Storing OCDB data ...");
481         Bool_t resultCDB = StoreOCDB(fgkMainCDB);
482
483         Log("SHUTTLE","Storing reference data ...");
484         Bool_t resultRef = StoreOCDB(fgkMainRefStorage);
485         
486         Log("SHUTTLE","Storing reference files ...");
487         Bool_t resultRefFiles = StoreRefFilesToGrid();
488         
489         return resultCDB && resultRef && resultRefFiles;
490 }
491
492 //______________________________________________________________________________________________
493 Bool_t AliShuttle::StoreOCDB(const TString& gridURI)
494 {
495         //
496         // Called by StoreOCDB(), performs actual storage to the main OCDB and reference storages (Grid)
497         //
498
499         TObjArray* gridIds=0;
500
501         Bool_t result = kTRUE;
502
503         const char* type = 0;
504         TString localURI;
505         if(gridURI == fgkMainCDB) {
506                 type = "OCDB";
507                 localURI = fgkLocalCDB;
508         } else if(gridURI == fgkMainRefStorage) {
509                 type = "reference";
510                 localURI = fgkLocalRefStorage;
511         } else {
512                 AliError(Form("Invalid storage URI: %s", gridURI.Data()));
513                 return kFALSE;
514         }
515
516         AliCDBManager* man = AliCDBManager::Instance();
517
518         AliCDBStorage *gridSto = man->GetStorage(gridURI);
519         if(!gridSto) {
520                 Log("SHUTTLE",
521                         Form("StoreOCDB - cannot activate main %s storage", type));
522                 return kFALSE;
523         }
524
525         gridIds = gridSto->GetQueryCDBList();
526
527         // get objects previously stored in local CDB
528         AliCDBStorage *localSto = man->GetStorage(localURI);
529         if(!localSto) {
530                 Log("SHUTTLE",
531                         Form("StoreOCDB - cannot activate local %s storage", type));
532                 return kFALSE;
533         }
534         AliCDBPath aPath(GetOfflineDetName(fCurrentDetector.Data()),"*","*");
535         // Local objects were stored with current run as Grid version!
536         TList* localEntries = localSto->GetAll(aPath.GetPath(), GetCurrentRun(), GetCurrentRun());
537         localEntries->SetOwner(1);
538
539         // loop on local stored objects
540         TIter localIter(localEntries);
541         AliCDBEntry *aLocEntry = 0;
542         while((aLocEntry = dynamic_cast<AliCDBEntry*> (localIter.Next()))){
543                 aLocEntry->SetOwner(1);
544                 AliCDBId aLocId = aLocEntry->GetId();
545                 aLocEntry->SetVersion(-1);
546                 aLocEntry->SetSubVersion(-1);
547
548                 // If local object is valid up to infinity we store it only if it is
549                 // the first unprocessed run!
550                 if (aLocId.GetLastRun() == AliCDBRunRange::Infinity() &&
551                         !fFirstUnprocessed[GetDetPos(fCurrentDetector)])
552                 {
553                         Log("SHUTTLE", Form("StoreOCDB - %s: object %s has validity infinite but "
554                                                 "there are previous unprocessed runs!",
555                                                 fCurrentDetector.Data(), aLocId.GetPath().Data()));
556                         continue;
557                 }
558
559                 // loop on Grid valid Id's
560                 Bool_t store = kTRUE;
561                 TIter gridIter(gridIds);
562                 AliCDBId* aGridId = 0;
563                 while((aGridId = dynamic_cast<AliCDBId*> (gridIter.Next()))){
564                         if(aGridId->GetPath() != aLocId.GetPath()) continue;
565                         // skip all objects valid up to infinity
566                         if(aGridId->GetLastRun() == AliCDBRunRange::Infinity()) continue;
567                         // if we get here, it means there's already some more recent object stored on Grid!
568                         store = kFALSE;
569                         break;
570                 }
571
572                 // If we get here, the file can be stored!
573                 Bool_t storeOk = gridSto->Put(aLocEntry);
574                 if(!store || storeOk){
575
576                         if (!store)
577                         {
578                                 Log(fCurrentDetector.Data(),
579                                         Form("StoreOCDB - A more recent object already exists in %s storage: <%s>",
580                                                 type, aGridId->ToString().Data()));
581                         } else {
582                                 Log("SHUTTLE",
583                                         Form("StoreOCDB - Object <%s> successfully put into %s storage",
584                                                 aLocId.ToString().Data(), type));
585                                 Log(fCurrentDetector.Data(),
586                                         Form("StoreOCDB - Object <%s> successfully put into %s storage",
587                                                 aLocId.ToString().Data(), type));
588                         }
589
590                         // removing local filename...
591                         TString filename;
592                         localSto->IdToFilename(aLocId, filename);
593                         AliInfo(Form("Removing local file %s", filename.Data()));
594                         RemoveFile(filename.Data());
595                         continue;
596                 } else  {
597                         Log("SHUTTLE",
598                                 Form("StoreOCDB - Grid %s storage of object <%s> failed",
599                                         type, aLocId.ToString().Data()));
600                         Log(fCurrentDetector.Data(),
601                                 Form("StoreOCDB - Grid %s storage of object <%s> failed",
602                                         type, aLocId.ToString().Data()));
603                         result = kFALSE;
604                 }
605         }
606         localEntries->Clear();
607
608         return result;
609 }
610
611 //______________________________________________________________________________________________
612 Bool_t AliShuttle::CleanReferenceStorage(const char* detector)
613 {
614         // clears the directory used to store reference files of a given subdetector
615   
616         AliCDBManager* man = AliCDBManager::Instance();
617         AliCDBStorage* sto = man->GetStorage(fgkLocalRefStorage);
618         TString localBaseFolder = sto->GetBaseFolder();
619
620         TString targetDir = GetRefFilePrefix(localBaseFolder.Data(), detector);
621         
622         Log("SHUTTLE", Form("Cleaning %s", targetDir.Data()));
623
624         TString begin;
625         begin.Form("%d_", GetCurrentRun());
626         
627         TSystemDirectory* baseDir = new TSystemDirectory("/", targetDir);
628         if (!baseDir)
629                 return kTRUE;
630                 
631         TList* dirList = baseDir->GetListOfFiles();
632         delete baseDir;
633         
634         if (!dirList) return kTRUE;
635                         
636         if (dirList->GetEntries() < 3) 
637         {
638                 delete dirList;
639                 return kTRUE;
640         }
641                                 
642         Int_t nDirs = 0, nDel = 0;
643         TIter dirIter(dirList);
644         TSystemFile* entry = 0;
645
646         Bool_t success = kTRUE;
647         
648         while ((entry = dynamic_cast<TSystemFile*> (dirIter.Next())))
649         {                                       
650                 if (entry->IsDirectory())
651                         continue;
652                 
653                 TString fileName(entry->GetName());
654                 if (!fileName.BeginsWith(begin))
655                         continue;
656                         
657                 nDirs++;
658                                                 
659                 // delete file
660                 Int_t result = gSystem->Unlink(fileName.Data());
661                 
662                 if (result)
663                 {
664                         Log("SHUTTLE", Form("Could not delete file %s!", fileName.Data()));
665                         success = kFALSE;
666                 } else {
667                         nDel++;
668                 }
669         }
670
671         if(nDirs > 0)
672                 Log("SHUTTLE", Form("CleanReferenceStorage - %d (over %d) reference files in folder %s were deleted.", 
673                         nDel, nDirs, targetDir.Data()));
674
675                 
676         delete dirList;
677         return success;
678
679
680
681
682
683
684   Int_t result = gSystem->GetPathInfo(targetDir, 0, (Long64_t*) 0, 0, 0);
685   if (result == 0)
686   {
687     // delete directory
688     result = gSystem->Exec(Form("rm -r %s", targetDir.Data()));
689     if (result != 0)
690     {  
691       Log("SHUTTLE", Form("StoreReferenceFile - Could not clear directory %s", targetDir.Data()));
692       return kFALSE;
693     }
694   }
695
696   result = gSystem->mkdir(targetDir, kTRUE);
697   if (result != 0)
698   {
699     Log("SHUTTLE", Form("StoreReferenceFile - Error creating base directory %s", targetDir.Data()));
700     return kFALSE;
701   }
702         
703   return kTRUE;
704 }
705
706 //______________________________________________________________________________________________
707 Bool_t AliShuttle::StoreReferenceFile(const char* detector, const char* localFile, const char* gridFileName)
708 {
709         //
710         // Stores reference file directly (without opening it). This function stores the file locally.
711         //
712         // The file is stored under the following location: 
713         // <base folder of local reference storage>/<DET>/<RUN#>_<gridFileName>
714         // where <gridFileName> is the second parameter given to the function
715         // 
716         
717         if (fTestMode & kErrorStorage)
718         {
719                 Log(fCurrentDetector, "StoreReferenceFile - In TESTMODE - Simulating error while storing locally");
720                 return kFALSE;
721         }
722         
723         AliCDBManager* man = AliCDBManager::Instance();
724         AliCDBStorage* sto = man->GetStorage(fgkLocalRefStorage);
725         
726         TString localBaseFolder = sto->GetBaseFolder();
727         
728         TString targetDir = GetRefFilePrefix(localBaseFolder.Data(), detector); 
729         
730         //try to open folder, if does not exist
731         void* dir = gSystem->OpenDirectory(targetDir.Data());
732         if (dir == NULL) {
733                 if (gSystem->mkdir(targetDir.Data(), kTRUE)) {
734                         Log("SHUTTLE", Form("Can't open directory <%s>", targetDir.Data()));
735                         return kFALSE;
736                 }
737
738         } else {
739                 gSystem->FreeDirectory(dir);
740         }
741
742         TString target;
743         target.Form("%s/%d_%s", targetDir.Data(), GetCurrentRun(), gridFileName);
744         
745         Int_t result = gSystem->GetPathInfo(localFile, 0, (Long64_t*) 0, 0, 0);
746         if (result)
747         {
748                 Log("SHUTTLE", Form("StoreReferenceFile - %s does not exist", localFile));
749                 return kFALSE;
750         }
751
752         result = gSystem->CopyFile(localFile, target);
753
754         if (result == 0)
755         {
756                 Log("SHUTTLE", Form("StoreReferenceFile - File %s stored locally to %s", localFile, target.Data()));
757                 return kTRUE;
758         }
759         else
760         {
761                 Log("SHUTTLE", Form("StoreReferenceFile - Could not store file %s to %s!. Error code = %d", 
762                                 localFile, target.Data(), result));
763                 return kFALSE;
764         }       
765 }
766
767 //______________________________________________________________________________________________
768 Bool_t AliShuttle::StoreRefFilesToGrid()
769 {
770         //
771         // Transfers the reference file to the Grid.
772         //
773         // The files are stored under the following location: 
774         // <base folder of reference storage>/<DET>/<RUN#>_<gridFileName>
775         //
776         
777         AliCDBManager* man = AliCDBManager::Instance();
778         AliCDBStorage* sto = man->GetStorage(fgkLocalRefStorage);
779         if (!sto)
780                 return kFALSE;
781         TString localBaseFolder = sto->GetBaseFolder();
782                 
783         TString dir = GetRefFilePrefix(localBaseFolder.Data(), fCurrentDetector.Data());
784                 
785         AliCDBStorage* gridSto = man->GetStorage(fgkMainRefStorage);
786         if (!gridSto)
787                 return kFALSE;
788         
789         TString gridBaseFolder = gridSto->GetBaseFolder();
790
791         TString alienDir = GetRefFilePrefix(gridBaseFolder.Data(), fCurrentDetector.Data());
792         
793         TString begin;
794         begin.Form("%d_", GetCurrentRun());
795         
796         TSystemDirectory* baseDir = new TSystemDirectory("/", dir);
797         if (!baseDir)
798                 return kTRUE;
799                 
800         TList* dirList = baseDir->GetListOfFiles();
801         delete baseDir;
802         
803         if (!dirList) return kTRUE;
804                 
805         if (dirList->GetEntries() < 3) 
806         {
807                 delete dirList;
808                 return kTRUE;
809         }
810                         
811         if (!gGrid)
812         { 
813                 Log("SHUTTLE", "Connection to Grid failed: Cannot continue!");
814                 delete dirList;
815                 return kFALSE;
816         }
817         
818         Int_t nDirs = 0, nTransfer = 0;
819         TIter dirIter(dirList);
820         TSystemFile* entry = 0;
821
822         Bool_t success = kTRUE;
823         Bool_t first = kTRUE;
824         
825         while ((entry = dynamic_cast<TSystemFile*> (dirIter.Next())))
826         {                       
827                 if (entry->IsDirectory())
828                         continue;
829                         
830                 TString fileName(entry->GetName());
831                 if (!fileName.BeginsWith(begin))
832                         continue;
833                         
834                 nDirs++;
835                         
836                 if (first)
837                 {
838                         first = kFALSE;
839                         // check that DET folder exists, otherwise create it
840                         TGridResult* result = gGrid->Ls(alienDir.Data(), "a");
841                         
842                         if (!result)
843                         {
844                                 delete dirList;
845                                 return kFALSE;
846                         }
847                         
848                         if (!result->GetFileName(1)) // TODO: It looks like element 0 is always 0!!
849                         {
850                                 if (!gGrid->Mkdir(alienDir.Data(),"",0))
851                                 {
852                                         Log("SHUTTLE", Form("StoreRefFilesToGrid - Cannot create directory %s",
853                                                         alienDir.Data()));
854                                         delete dirList;
855                                         return kFALSE;
856                                 } else {
857                                         Log("SHUTTLE",Form("Folder %s created", alienDir.Data()));
858                                 }
859                                 
860                         } else {
861                                         Log("SHUTTLE",Form("Folder %s found", alienDir.Data()));
862                         }
863                 }
864                         
865                 TString fullLocalPath;
866                 fullLocalPath.Form("%s/%s", dir.Data(), fileName.Data());
867                 
868                 TString fullGridPath;
869                 fullGridPath.Form("alien://%s/%s", alienDir.Data(), fileName.Data());
870
871                 TFileMerger fileMerger;
872                 Bool_t result = TFile::Cp(fullLocalPath, fullGridPath);
873                 
874                 if (result)
875                 {
876                         Log("SHUTTLE", Form("StoreRefFilesToGrid - Copying local file %s to %s succeeded!", fullLocalPath.Data(), fullGridPath.Data()));
877                         RemoveFile(fullLocalPath);
878                         nTransfer++;
879                 }
880                 else
881                 {
882                         Log("SHUTTLE", Form("StoreRefFilesToGrid - Copying local file %s to %s FAILED!", fullLocalPath.Data(), fullGridPath.Data()));
883                         success = kFALSE;
884                 }
885         }
886
887         Log("SHUTTLE", Form("StoreRefFilesToGrid - %d (over %d) reference files in folder %s copied to Grid.", nTransfer, nDirs, dir.Data()));
888
889                 
890         delete dirList;
891         return success;
892 }
893
894 //______________________________________________________________________________________________
895 const char* AliShuttle::GetRefFilePrefix(const char* base, const char* detector)
896 {
897         //
898         // Get folder name of reference files 
899         //
900
901         TString offDetStr(GetOfflineDetName(detector));
902         TString dir;
903         if (offDetStr == "ITS" || offDetStr == "MUON" || offDetStr == "PHOS")
904         {
905                 dir.Form("%s/%s/%s", base, offDetStr.Data(), detector);
906         } else {
907                 dir.Form("%s/%s", base, offDetStr.Data());
908         }
909         
910         return dir.Data();
911         
912
913 }
914 //______________________________________________________________________________________________
915 void AliShuttle::CleanLocalStorage(const TString& uri)
916 {
917         //
918         // Called in case the preprocessor is declared failed. Remove remaining objects from the local storages.
919         //
920
921         const char* type = 0;
922         if(uri == fgkLocalCDB) {
923                 type = "OCDB";
924         } else if(uri == fgkLocalRefStorage) {
925                 type = "Reference";
926         } else {
927                 AliError(Form("Invalid storage URI: %s", uri.Data()));
928                 return;
929         }
930
931         AliCDBManager* man = AliCDBManager::Instance();
932
933         // open local storage
934         AliCDBStorage *localSto = man->GetStorage(uri);
935         if(!localSto) {
936                 Log("SHUTTLE",
937                         Form("CleanLocalStorage - cannot activate local %s storage", type));
938                 return;
939         }
940
941         TString filename(Form("%s/%s/*/Run*_v%d_s*.root",
942                 localSto->GetBaseFolder().Data(), GetOfflineDetName(fCurrentDetector.Data()), GetCurrentRun()));
943
944         AliInfo(Form("filename = %s", filename.Data()));
945
946         AliInfo(Form("Removing remaining local files from run %d and detector %s ...",
947                 GetCurrentRun(), fCurrentDetector.Data()));
948
949         RemoveFile(filename.Data());
950
951 }
952
953 //______________________________________________________________________________________________
954 void AliShuttle::RemoveFile(const char* filename)
955 {
956         //
957         // removes local file
958         //
959
960         TString command(Form("rm -f %s", filename));
961
962         Int_t result = gSystem->Exec(command.Data());
963         if(result != 0)
964         {
965                 Log("SHUTTLE", Form("RemoveFile - %s: Cannot remove file %s!",
966                         fCurrentDetector.Data(), filename));
967         }
968 }
969
970 //______________________________________________________________________________________________
971 AliShuttleStatus* AliShuttle::ReadShuttleStatus()
972 {
973         //
974         // Reads the AliShuttleStatus from the CDB
975         //
976
977         if (fStatusEntry){
978                 delete fStatusEntry;
979                 fStatusEntry = 0;
980         }
981
982         fStatusEntry = AliCDBManager::Instance()->GetStorage(GetLocalCDB())
983                 ->Get(Form("/SHUTTLE/STATUS/%s", fCurrentDetector.Data()), GetCurrentRun());
984
985         if (!fStatusEntry) return 0;
986         fStatusEntry->SetOwner(1);
987
988         AliShuttleStatus* status = dynamic_cast<AliShuttleStatus*> (fStatusEntry->GetObject());
989         if (!status) {
990                 AliError("Invalid object stored to CDB!");
991                 return 0;
992         }
993
994         return status;
995 }
996
997 //______________________________________________________________________________________________
998 Bool_t AliShuttle::WriteShuttleStatus(AliShuttleStatus* status)
999 {
1000         //
1001         // writes the status for one subdetector
1002         //
1003
1004         if (fStatusEntry){
1005                 delete fStatusEntry;
1006                 fStatusEntry = 0;
1007         }
1008
1009         Int_t run = GetCurrentRun();
1010
1011         AliCDBId id(AliCDBPath("SHUTTLE", "STATUS", fCurrentDetector), run, run);
1012
1013         fStatusEntry = new AliCDBEntry(status, id, new AliCDBMetaData);
1014         fStatusEntry->SetOwner(1);
1015
1016         UInt_t result = AliCDBManager::Instance()->GetStorage(fgkLocalCDB)->Put(fStatusEntry);
1017
1018         if (!result) {
1019                 Log("SHUTTLE", Form("WriteShuttleStatus - Failed for %s, run %d",
1020                                                 fCurrentDetector.Data(), run));
1021                 return kFALSE;
1022         }
1023         
1024         SendMLInfo();
1025
1026         return kTRUE;
1027 }
1028
1029 //______________________________________________________________________________________________
1030 void AliShuttle::UpdateShuttleStatus(AliShuttleStatus::Status newStatus, Bool_t increaseCount)
1031 {
1032         //
1033         // changes the AliShuttleStatus for the given detector and run to the given status
1034         //
1035
1036         if (!fStatusEntry){
1037                 AliError("UNEXPECTED: fStatusEntry empty");
1038                 return;
1039         }
1040
1041         AliShuttleStatus* status = dynamic_cast<AliShuttleStatus*> (fStatusEntry->GetObject());
1042
1043         if (!status){
1044                 Log("SHUTTLE", "UNEXPECTED: status could not be read from current CDB entry");
1045                 return;
1046         }
1047
1048         TString actionStr = Form("UpdateShuttleStatus - %s: Changing state from %s to %s",
1049                                 fCurrentDetector.Data(),
1050                                 status->GetStatusName(),
1051                                 status->GetStatusName(newStatus));
1052         Log("SHUTTLE", actionStr);
1053         SetLastAction(actionStr);
1054
1055         status->SetStatus(newStatus);
1056         if (increaseCount) status->IncreaseCount();
1057
1058         AliCDBManager::Instance()->GetStorage(fgkLocalCDB)->Put(fStatusEntry);
1059
1060         SendMLInfo();
1061 }
1062
1063 //______________________________________________________________________________________________
1064 void AliShuttle::SendMLInfo()
1065 {
1066         //
1067         // sends ML information about the current status of the current detector being processed
1068         //
1069         
1070         AliShuttleStatus* status = dynamic_cast<AliShuttleStatus*> (fStatusEntry->GetObject());
1071         
1072         if (!status){
1073                 Log("SHUTTLE", "SendMLInfo - UNEXPECTED: status could not be read from current CDB entry");
1074                 return;
1075         }
1076         
1077         TMonaLisaText  mlStatus(Form("%s_status", fCurrentDetector.Data()), status->GetStatusName());
1078         TMonaLisaValue mlRetryCount(Form("%s_count", fCurrentDetector.Data()), status->GetCount());
1079
1080         TList mlList;
1081         mlList.Add(&mlStatus);
1082         mlList.Add(&mlRetryCount);
1083
1084         fMonaLisa->SendParameters(&mlList);
1085 }
1086
1087 //______________________________________________________________________________________________
1088 Bool_t AliShuttle::ContinueProcessing()
1089 {
1090         // this function reads the AliShuttleStatus information from CDB and
1091         // checks if the processing should be continued
1092         // if yes it returns kTRUE and updates the AliShuttleStatus with nextStatus
1093
1094         if (!fConfig->HostProcessDetector(fCurrentDetector)) return kFALSE;
1095
1096         AliPreprocessor* aPreprocessor =
1097                 dynamic_cast<AliPreprocessor*> (fPreprocessorMap.GetValue(fCurrentDetector));
1098         if (!aPreprocessor)
1099         {
1100                 AliInfo(Form("%s: no preprocessor registered", fCurrentDetector.Data()));
1101                 return kFALSE;
1102         }
1103
1104         AliShuttleLogbookEntry::Status entryStatus =
1105                 fLogbookEntry->GetDetectorStatus(fCurrentDetector);
1106
1107         if(entryStatus != AliShuttleLogbookEntry::kUnprocessed) {
1108                 AliInfo(Form("ContinueProcessing - %s is %s",
1109                                 fCurrentDetector.Data(),
1110                                 fLogbookEntry->GetDetectorStatusName(entryStatus)));
1111                 return kFALSE;
1112         }
1113
1114         // if we get here, according to Shuttle logbook subdetector is in UNPROCESSED state
1115
1116         // check if current run is first unprocessed run for current detector
1117         if (fConfig->StrictRunOrder(fCurrentDetector) &&
1118                 !fFirstUnprocessed[GetDetPos(fCurrentDetector)])
1119         {
1120                 if (fTestMode == kNone)
1121                 {
1122                         Log("SHUTTLE", Form("ContinueProcessing - %s requires strict run ordering but this is not the first unprocessed run!"));
1123                         return kFALSE;
1124                 }
1125                 else
1126                 {
1127                         Log("SHUTTLE", Form("ContinueProcessing - In TESTMODE - Although %s requires strict run ordering and this is not the first unprocessed run, the SHUTTLE continues"));
1128                 }
1129         }
1130
1131         AliShuttleStatus* status = ReadShuttleStatus();
1132         if (!status) {
1133                 // first time
1134                 Log("SHUTTLE", Form("ContinueProcessing - %s: Processing first time",
1135                                 fCurrentDetector.Data()));
1136                 status = new AliShuttleStatus(AliShuttleStatus::kStarted);
1137                 return WriteShuttleStatus(status);
1138         }
1139
1140         // The following two cases shouldn't happen if Shuttle Logbook was correctly updated.
1141         // If it happens it may mean Logbook updating failed... let's do it now!
1142         if (status->GetStatus() == AliShuttleStatus::kDone ||
1143             status->GetStatus() == AliShuttleStatus::kFailed){
1144                 Log("SHUTTLE", Form("ContinueProcessing - %s is already %s. Updating Shuttle Logbook",
1145                                         fCurrentDetector.Data(),
1146                                         status->GetStatusName(status->GetStatus())));
1147                 UpdateShuttleLogbook(fCurrentDetector.Data(),
1148                                         status->GetStatusName(status->GetStatus()));
1149                 return kFALSE;
1150         }
1151
1152         if (status->GetStatus() == AliShuttleStatus::kStoreError) {
1153                 Log("SHUTTLE",
1154                         Form("ContinueProcessing - %s: Grid storage of one or more objects failed. Trying again now",
1155                                 fCurrentDetector.Data()));
1156                 UpdateShuttleStatus(AliShuttleStatus::kStoreStarted);
1157                 if (StoreOCDB()){
1158                         Log("SHUTTLE", Form("ContinueProcessing - %s: all objects successfully stored into main storage",
1159                                 fCurrentDetector.Data()));
1160                         UpdateShuttleStatus(AliShuttleStatus::kDone);
1161                         UpdateShuttleLogbook(fCurrentDetector.Data(), "DONE");
1162                 } else {
1163                         Log("SHUTTLE",
1164                                 Form("ContinueProcessing - %s: Grid storage failed again",
1165                                         fCurrentDetector.Data()));
1166                         UpdateShuttleStatus(AliShuttleStatus::kStoreError);
1167                 }
1168                 return kFALSE;
1169         }
1170
1171         // if we get here, there is a restart
1172         Bool_t cont = kFALSE;
1173
1174         // abort conditions
1175         if (status->GetCount() >= fConfig->GetMaxRetries()) {
1176                 Log("SHUTTLE", Form("ContinueProcessing - %s failed %d times in status %s - "
1177                                 "Updating Shuttle Logbook", fCurrentDetector.Data(),
1178                                 status->GetCount(), status->GetStatusName()));
1179                 UpdateShuttleLogbook(fCurrentDetector.Data(), "FAILED");
1180                 UpdateShuttleStatus(AliShuttleStatus::kFailed);
1181
1182                 // there may still be objects in local OCDB and reference storage
1183                 // and FXS databases may be not updated: do it now!
1184                 
1185                 // TODO Currently disabled, we want to keep files in case of failure!
1186                 // CleanLocalStorage(fgkLocalCDB);
1187                 // CleanLocalStorage(fgkLocalRefStorage);
1188                 // UpdateTableFailCase();
1189                 
1190                 // Send mail to detector expert!
1191                 AliInfo(Form("Sending mail to %s expert...", fCurrentDetector.Data()));
1192                 if (!SendMail())
1193                         Log("SHUTTLE", Form("ContinueProcessing - Could not send mail to %s expert",
1194                                         fCurrentDetector.Data()));
1195
1196         } else {
1197                 Log("SHUTTLE", Form("ContinueProcessing - %s: restarting. "
1198                                 "Aborted before with %s. Retry number %d.", fCurrentDetector.Data(),
1199                                 status->GetStatusName(), status->GetCount()));
1200                 Bool_t increaseCount = kTRUE;
1201                 if (status->GetStatus() == AliShuttleStatus::kDCSError || status->GetStatus() == AliShuttleStatus::kDCSStarted)
1202                         increaseCount = kFALSE;
1203                 UpdateShuttleStatus(AliShuttleStatus::kStarted, increaseCount);
1204                 cont = kTRUE;
1205         }
1206
1207         return cont;
1208 }
1209
1210 //______________________________________________________________________________________________
1211 Bool_t AliShuttle::Process(AliShuttleLogbookEntry* entry)
1212 {
1213         //
1214         // Makes data retrieval for all detectors in the configuration.
1215         // entry: Shuttle logbook entry, contains run paramenters and status of detectors
1216         // (Unprocessed, Inactive, Failed or Done).
1217         // Returns kFALSE in case of error occured and kTRUE otherwise
1218         //
1219
1220         if (!entry) return kFALSE;
1221
1222         fLogbookEntry = entry;
1223
1224         AliInfo(Form("\n\n \t\t\t^*^*^*^*^*^*^*^*^*^*^*^* run %d: START ^*^*^*^*^*^*^*^*^*^*^*^* \n",
1225                                         GetCurrentRun()));
1226
1227         // create ML instance that monitors this run
1228         fMonaLisa = new TMonaLisaWriter(Form("%d", GetCurrentRun()), "SHUTTLE", "aliendb1.cern.ch");
1229         // disable monitoring of other parameters that come e.g. from TFile
1230         gMonitoringWriter = 0;
1231
1232         // Send the information to ML
1233         TMonaLisaText  mlStatus("SHUTTLE_status", "Processing");
1234         TMonaLisaText  mlRunType("SHUTTLE_runtype", Form("%s (%s)", entry->GetRunType(), entry->GetRunParameter("log")));
1235
1236         TList mlList;
1237         mlList.Add(&mlStatus);
1238         mlList.Add(&mlRunType);
1239
1240         fMonaLisa->SendParameters(&mlList);
1241
1242         if (fLogbookEntry->IsDone())
1243         {
1244                 Log("SHUTTLE","Process - Shuttle is already DONE. Updating logbook");
1245                 UpdateShuttleLogbook("shuttle_done");
1246                 fLogbookEntry = 0;
1247                 return kTRUE;
1248         }
1249
1250         // read test mode if flag is set
1251         if (fReadTestMode)
1252         {
1253                 fTestMode = kNone;
1254                 TString logEntry(entry->GetRunParameter("log"));
1255                 //printf("log entry = %s\n", logEntry.Data());
1256                 TString searchStr("Testmode: ");
1257                 Int_t pos = logEntry.Index(searchStr.Data());
1258                 //printf("%d\n", pos);
1259                 if (pos >= 0)
1260                 {
1261                         TSubString subStr = logEntry(pos + searchStr.Length(), logEntry.Length());
1262                         //printf("%s\n", subStr.String().Data());
1263                         TString newStr(subStr.Data());
1264                         TObjArray* token = newStr.Tokenize(' ');
1265                         if (token)
1266                         {
1267                                 //token->Print();
1268                                 TObjString* tmpStr = dynamic_cast<TObjString*> (token->First());
1269                                 if (tmpStr)
1270                                 {
1271                                         Int_t testMode = tmpStr->String().Atoi();
1272                                         if (testMode > 0)
1273                                         {
1274                                                 Log("SHUTTLE", Form("Enabling test mode %d", testMode));
1275                                                 SetTestMode((TestMode) testMode);
1276                                         }
1277                                 }
1278                                 delete token;          
1279                         }
1280                 }
1281         }
1282         
1283         Log("SHUTTLE", Form("The test mode flag is %d", (Int_t) fTestMode));
1284         
1285         fLogbookEntry->Print("all");
1286
1287         // Initialization
1288         Bool_t hasError = kFALSE;
1289
1290         AliCDBStorage *mainCDBSto = AliCDBManager::Instance()->GetStorage(fgkMainCDB);
1291         if(mainCDBSto) mainCDBSto->QueryCDB(GetCurrentRun());
1292         AliCDBStorage *mainRefSto = AliCDBManager::Instance()->GetStorage(fgkMainRefStorage);
1293         if(mainRefSto) mainRefSto->QueryCDB(GetCurrentRun());
1294
1295         // Loop on detectors in the configuration
1296         TIter iter(fConfig->GetDetectors());
1297         TObjString* aDetector = 0;
1298
1299         while ((aDetector = (TObjString*) iter.Next()))
1300         {
1301                 fCurrentDetector = aDetector->String();
1302
1303                 if (ContinueProcessing() == kFALSE) continue;
1304
1305                 AliInfo(Form("\n\n \t\t\t****** run %d - %s: START  ******",
1306                                                 GetCurrentRun(), aDetector->GetName()));
1307
1308                 for(Int_t iSys=0;iSys<3;iSys++) fFXSCalled[iSys]=kFALSE;
1309
1310                 Log(fCurrentDetector.Data(), "Starting processing");
1311
1312                 Int_t pid = fork();
1313
1314                 if (pid < 0)
1315                 {
1316                         Log("SHUTTLE", "ERROR: Forking failed");
1317                 }
1318                 else if (pid > 0)
1319                 {
1320                         // parent
1321                         AliInfo(Form("In parent process of %d - %s: Starting monitoring",
1322                                                         GetCurrentRun(), aDetector->GetName()));
1323
1324                         Long_t begin = time(0);
1325
1326                         int status; // to be used with waitpid, on purpose an int (not Int_t)!
1327                         while (waitpid(pid, &status, WNOHANG) == 0)
1328                         {
1329                                 Long_t expiredTime = time(0) - begin;
1330
1331                                 if (expiredTime > fConfig->GetPPTimeOut())
1332                                 {
1333                                         TString tmp;
1334                                         tmp.Form("Process of %s time out. Run time: %d seconds. Killing...",
1335                                                                 fCurrentDetector.Data(), expiredTime);
1336                                         Log("SHUTTLE", tmp);
1337                                         Log(fCurrentDetector, tmp);
1338
1339                                         kill(pid, 9);
1340
1341                                         UpdateShuttleStatus(AliShuttleStatus::kPPTimeOut);
1342                                         hasError = kTRUE;
1343
1344                                         gSystem->Sleep(1000);
1345                                 }
1346                                 else
1347                                 {
1348                                         gSystem->Sleep(1000);
1349                                         
1350                                         TString checkStr;
1351                                         checkStr.Form("ps -o vsize --pid %d | tail -n 1", pid);
1352                                         FILE* pipe = gSystem->OpenPipe(checkStr, "r");
1353                                         if (!pipe)
1354                                         {
1355                                                 Log("SHUTTLE", Form("Error: Could not open pipe to %s", checkStr.Data()));
1356                                                 continue;
1357                                         }
1358                                                 
1359                                         char buffer[100];
1360                                         if (!fgets(buffer, 100, pipe))
1361                                         {
1362                                                 Log("SHUTTLE", "Error: ps did not return anything");
1363                                                 gSystem->ClosePipe(pipe);
1364                                                 continue;
1365                                         }
1366                                         gSystem->ClosePipe(pipe);
1367                                         
1368                                         //Log("SHUTTLE", Form("ps returned %s", buffer));
1369                                         
1370                                         Int_t mem = 0;
1371                                         if ((sscanf(buffer, "%d\n", &mem) != 1) || !mem)
1372                                         {
1373                                                 Log("SHUTTLE", "Error: Could not parse output of ps");
1374                                                 continue;
1375                                         }
1376                                         
1377                                         if (expiredTime % 60 == 0)
1378                                                 Log("SHUTTLE", Form("%s: Checking process. Run time: %d seconds - Memory consumption: %d KB",
1379                                                                 fCurrentDetector.Data(), expiredTime, mem));
1380                                         
1381                                         if (mem > fConfig->GetPPMaxMem())
1382                                         {
1383                                                 TString tmp;
1384                                                 tmp.Form("Process exceeds maximum allowed memory (%d KB > %d KB). Killing...",
1385                                                         mem, fConfig->GetPPMaxMem());
1386                                                 Log("SHUTTLE", tmp);
1387                                                 Log(fCurrentDetector, tmp);
1388         
1389                                                 kill(pid, 9);
1390         
1391                                                 UpdateShuttleStatus(AliShuttleStatus::kPPOutOfMemory);
1392                                                 hasError = kTRUE;
1393         
1394                                                 gSystem->Sleep(1000);
1395                                         }
1396                                 }
1397                         }
1398
1399                         AliInfo(Form("In parent process of %d - %s: Client has terminated.",
1400                                                                 GetCurrentRun(), aDetector->GetName()));
1401
1402                         if (WIFEXITED(status))
1403                         {
1404                                 Int_t returnCode = WEXITSTATUS(status);
1405
1406                                 Log("SHUTTLE", Form("%s: the return code is %d", fCurrentDetector.Data(),
1407                                                                                 returnCode));
1408
1409                                 if (returnCode == 0) hasError = kTRUE;
1410                         }
1411                 }
1412                 else if (pid == 0)
1413                 {
1414                         // client
1415                         AliInfo(Form("In client process of %d - %s", GetCurrentRun(), aDetector->GetName()));
1416
1417                         AliInfo("Redirecting output...");
1418
1419                         if ((freopen(GetLogFileName(fCurrentDetector), "a", stdout)) == 0)
1420                         {
1421                                 Log("SHUTTLE", "Could not freopen stdout");
1422                         }
1423                         else
1424                         {
1425                                 fOutputRedirected = kTRUE;
1426                                 if ((dup2(fileno(stdout), fileno(stderr))) < 0)
1427                                         Log("SHUTTLE", "Could not redirect stderr");
1428                                 
1429                         }
1430                         
1431                         TString wd = gSystem->WorkingDirectory();
1432                         TString tmpDir = Form("%s/%s_process",GetShuttleTempDir(),fCurrentDetector.Data());
1433                         
1434                         gSystem->mkdir(tmpDir.Data());
1435                         gSystem->ChangeDirectory(tmpDir.Data());
1436                         
1437                         Bool_t success = ProcessCurrentDetector();
1438                         
1439                         gSystem->ChangeDirectory(wd.Data());
1440                         
1441                         gSystem->Exec(Form("rm -rf %s",tmpDir.Data()));
1442                         
1443                         if (success) // Preprocessor finished successfully!
1444                         { 
1445                                 // Update time_processed field in FXS DB
1446                                 if (UpdateTable() == kFALSE)
1447                                         Log("SHUTTLE", Form("Process - %s: Could not update FXS databases!", 
1448                                                         fCurrentDetector.Data()));
1449
1450                                 // Transfer the data from local storage to main storage (Grid)
1451                                 UpdateShuttleStatus(AliShuttleStatus::kStoreStarted);
1452                                 if (StoreOCDB() == kFALSE)
1453                                 {
1454                                         AliInfo(Form("\n \t\t\t****** run %d - %s: STORAGE ERROR ****** \n\n",
1455                                                         GetCurrentRun(), aDetector->GetName()));
1456                                         UpdateShuttleStatus(AliShuttleStatus::kStoreError);
1457                                         success = kFALSE;
1458                                 } else {
1459                                         AliInfo(Form("\n \t\t\t****** run %d - %s: DONE ****** \n\n",
1460                                                         GetCurrentRun(), aDetector->GetName()));
1461                                         UpdateShuttleStatus(AliShuttleStatus::kDone);
1462                                         UpdateShuttleLogbook(fCurrentDetector, "DONE");
1463                                 }
1464                         }
1465
1466                         for (UInt_t iSys=0; iSys<3; iSys++)
1467                         {
1468                                 if (fFXSCalled[iSys]) fFXSlist[iSys].Clear();
1469                         }
1470
1471                         AliInfo(Form("Client process of %d - %s is exiting now with %d.",
1472                                                         GetCurrentRun(), aDetector->GetName(), success));
1473
1474                         // the client exits here
1475                         gSystem->Exit(success);
1476
1477                         AliError("We should never get here!!!");
1478                 }
1479         }
1480
1481         AliInfo(Form("\n\n \t\t\t^*^*^*^*^*^*^*^*^*^*^*^* run %d: FINISH ^*^*^*^*^*^*^*^*^*^*^*^* \n",
1482                                                         GetCurrentRun()));
1483
1484         //check if shuttle is done for this run, if so update logbook
1485         TObjArray checkEntryArray;
1486         checkEntryArray.SetOwner(1);
1487         TString whereClause = Form("where run=%d", GetCurrentRun());
1488         if (!QueryShuttleLogbook(whereClause.Data(), checkEntryArray) || checkEntryArray.GetEntries() == 0) {
1489                 Log("SHUTTLE", Form("Process - Warning: Cannot check status of run %d on Shuttle logbook!",
1490                                                 GetCurrentRun()));
1491                 return hasError == kFALSE;
1492         }
1493
1494         AliShuttleLogbookEntry* checkEntry = dynamic_cast<AliShuttleLogbookEntry*>
1495                                                 (checkEntryArray.At(0));
1496
1497         if (checkEntry)
1498         {
1499                 if (checkEntry->IsDone())
1500                 {
1501                         Log("SHUTTLE","Process - Shuttle is DONE. Updating logbook");
1502                         UpdateShuttleLogbook("shuttle_done");
1503                 }
1504                 else
1505                 {
1506                         for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
1507                         {
1508                                 if (checkEntry->GetDetectorStatus(iDet) == AliShuttleLogbookEntry::kUnprocessed)
1509                                 {
1510                                         AliDebug(2, Form("Run %d: setting %s as \"not first time unprocessed\"",
1511                                                         checkEntry->GetRun(), GetDetName(iDet)));
1512                                         fFirstUnprocessed[iDet] = kFALSE;
1513                                 }
1514                         }
1515                 }
1516         }
1517
1518         // remove ML instance
1519         delete fMonaLisa;
1520         fMonaLisa = 0;
1521
1522         fLogbookEntry = 0;
1523
1524         return hasError == kFALSE;
1525 }
1526
1527 //______________________________________________________________________________________________
1528 Bool_t AliShuttle::ProcessCurrentDetector()
1529 {
1530         //
1531         // Makes data retrieval just for a specific detector (fCurrentDetector).
1532         // Threre should be a configuration for this detector.
1533
1534         AliInfo(Form("Retrieving values for %s, run %d", fCurrentDetector.Data(), GetCurrentRun()));
1535
1536         if (!CleanReferenceStorage(fCurrentDetector.Data()))
1537                 return kFALSE;
1538
1539         TMap* dcsMap = 0;
1540
1541         // call preprocessor
1542         AliPreprocessor* aPreprocessor =
1543                 dynamic_cast<AliPreprocessor*> (fPreprocessorMap.GetValue(fCurrentDetector));
1544
1545         aPreprocessor->Initialize(GetCurrentRun(), GetCurrentStartTime(), GetCurrentEndTime());
1546
1547         Bool_t processDCS = aPreprocessor->ProcessDCS();
1548
1549         if (!processDCS)
1550         {
1551                 Log(fCurrentDetector, "The preprocessor requested to skip the retrieval of DCS values");
1552         }
1553         else if (fTestMode & kSkipDCS)
1554         {
1555                 Log(fCurrentDetector, "In TESTMODE - Skipping DCS processing!");
1556         } 
1557         else if (fTestMode & kErrorDCS)
1558         {
1559                 Log(fCurrentDetector, "In TESTMODE - Simulating DCS error");
1560                 UpdateShuttleStatus(AliShuttleStatus::kDCSStarted);
1561                 UpdateShuttleStatus(AliShuttleStatus::kDCSError);
1562                 return kFALSE;
1563         } else {
1564
1565                 UpdateShuttleStatus(AliShuttleStatus::kDCSStarted);
1566
1567                 TString host(fConfig->GetDCSHost(fCurrentDetector));
1568                 Int_t port = fConfig->GetDCSPort(fCurrentDetector);
1569
1570                 if (fConfig->GetDCSAliases(fCurrentDetector)->GetEntries() > 0)
1571                 {
1572                         dcsMap = GetValueSet(host, port, fConfig->GetDCSAliases(fCurrentDetector), kAlias);
1573                         if (!dcsMap)
1574                         {
1575                                 Log(fCurrentDetector, "ProcessCurrentDetector - Error while retrieving DCS aliases");
1576                                 UpdateShuttleStatus(AliShuttleStatus::kDCSError);
1577                                 return kFALSE;
1578                         }
1579                 }
1580                 
1581                 if (fConfig->GetDCSDataPoints(fCurrentDetector)->GetEntries() > 0)
1582                 {
1583                         TMap* dcsMap2 = GetValueSet(host, port, fConfig->GetDCSDataPoints(fCurrentDetector), kDP);
1584                         if (!dcsMap2)
1585                         {
1586                                 Log(fCurrentDetector, "ProcessCurrentDetector - Error while retrieving DCS data points");
1587                                 UpdateShuttleStatus(AliShuttleStatus::kDCSError);
1588                                 if (dcsMap)
1589                                         delete dcsMap;
1590                                 return kFALSE;
1591                         }
1592                         
1593                         if (!dcsMap)
1594                         {
1595                                 dcsMap = dcsMap2;
1596                         }
1597                         else // merge
1598                         {
1599                                 TIter iter(dcsMap2);
1600                                 TObjString* key = 0;
1601                                 while ((key = (TObjString*) iter.Next()))
1602                                         dcsMap->Add(key, dcsMap2->GetValue(key->String()));
1603                                         
1604                                 dcsMap2->SetOwner(kFALSE);
1605                                 delete dcsMap2;
1606                         }
1607                 }
1608                 
1609         }
1610
1611         // still no map?
1612         if (!dcsMap)
1613                 dcsMap = new TMap;
1614         
1615         // DCS Archive DB processing successful. Call Preprocessor!
1616         UpdateShuttleStatus(AliShuttleStatus::kPPStarted);
1617
1618         UInt_t returnValue = aPreprocessor->Process(dcsMap);
1619
1620         if (returnValue > 0) // Preprocessor error!
1621         {
1622                 Log(fCurrentDetector, Form("Preprocessor failed. Process returned %d.", returnValue));
1623                 UpdateShuttleStatus(AliShuttleStatus::kPPError);
1624                 dcsMap->DeleteAll();
1625                 delete dcsMap;
1626                 return kFALSE;
1627         }
1628         
1629         // preprocessor ok!
1630         UpdateShuttleStatus(AliShuttleStatus::kPPDone);
1631         Log(fCurrentDetector, Form("ProcessCurrentDetector - %s preprocessor returned success",
1632                                 fCurrentDetector.Data()));
1633
1634         dcsMap->DeleteAll();
1635         delete dcsMap;
1636
1637         return kTRUE;
1638 }
1639
1640 //______________________________________________________________________________________________
1641 Bool_t AliShuttle::QueryShuttleLogbook(const char* whereClause,
1642                 TObjArray& entries)
1643 {
1644         // Query DAQ's Shuttle logbook and fills detector status object.
1645         // Call QueryRunParameters to query DAQ logbook for run parameters.
1646         //
1647
1648         entries.SetOwner(1);
1649
1650         // check connection, in case connect
1651         if(!Connect(3)) return kFALSE;
1652
1653         TString sqlQuery;
1654         sqlQuery = Form("select * from %s %s order by run", fConfig->GetShuttlelbTable(), whereClause);
1655
1656         TSQLResult* aResult = fServer[3]->Query(sqlQuery);
1657         if (!aResult) {
1658                 AliError(Form("Can't execute query <%s>!", sqlQuery.Data()));
1659                 return kFALSE;
1660         }
1661
1662         AliDebug(2,Form("Query = %s", sqlQuery.Data()));
1663
1664         if(aResult->GetRowCount() == 0) {
1665                 AliInfo("No entries in Shuttle Logbook match request");
1666                 delete aResult;
1667                 return kTRUE;
1668         }
1669
1670         // TODO Check field count!
1671         const UInt_t nCols = 23;
1672         if (aResult->GetFieldCount() != (Int_t) nCols) {
1673                 AliError("Invalid SQL result field number!");
1674                 delete aResult;
1675                 return kFALSE;
1676         }
1677
1678         TSQLRow* aRow;
1679         while ((aRow = aResult->Next())) {
1680                 TString runString(aRow->GetField(0), aRow->GetFieldLength(0));
1681                 Int_t run = runString.Atoi();
1682
1683                 AliShuttleLogbookEntry *entry = QueryRunParameters(run);
1684                 if (!entry)
1685                         continue;
1686
1687                 // loop on detectors
1688                 for(UInt_t ii = 0; ii < nCols; ii++)
1689                         entry->SetDetectorStatus(aResult->GetFieldName(ii), aRow->GetField(ii));
1690
1691                 entries.AddLast(entry);
1692                 delete aRow;
1693         }
1694
1695         delete aResult;
1696         return kTRUE;
1697 }
1698
1699 //______________________________________________________________________________________________
1700 AliShuttleLogbookEntry* AliShuttle::QueryRunParameters(Int_t run)
1701 {
1702         //
1703         // Retrieve run parameters written in the DAQ logbook and sets them into AliShuttleLogbookEntry object
1704         //
1705
1706         // check connection, in case connect
1707         if (!Connect(3))
1708                 return 0;
1709
1710         TString sqlQuery;
1711         sqlQuery.Form("select * from %s where run=%d", fConfig->GetDAQlbTable(), run);
1712
1713         TSQLResult* aResult = fServer[3]->Query(sqlQuery);
1714         if (!aResult) {
1715                 AliError(Form("Can't execute query <%s>!", sqlQuery.Data()));
1716                 return 0;
1717         }
1718
1719         if (aResult->GetRowCount() == 0) {
1720                 Log("SHUTTLE", Form("QueryRunParameters - No entry in DAQ Logbook for run %d. Skipping", run));
1721                 delete aResult;
1722                 return 0;
1723         }
1724
1725         if (aResult->GetRowCount() > 1) {
1726                 AliError(Form("More than one entry in DAQ Logbook for run %d. Skipping", run));
1727                 delete aResult;
1728                 return 0;
1729         }
1730
1731         TSQLRow* aRow = aResult->Next();
1732         if (!aRow)
1733         {
1734                 AliError(Form("Could not retrieve row for run %d. Skipping", run));
1735                 delete aResult;
1736                 return 0;
1737         }
1738
1739         AliShuttleLogbookEntry* entry = new AliShuttleLogbookEntry(run);
1740
1741         for (Int_t ii = 0; ii < aResult->GetFieldCount(); ii++)
1742                 entry->SetRunParameter(aResult->GetFieldName(ii), aRow->GetField(ii));
1743
1744         UInt_t startTime = entry->GetStartTime();
1745         UInt_t endTime = entry->GetEndTime();
1746
1747         if (!startTime || !endTime || startTime > endTime) {
1748                 Log("SHUTTLE",
1749                         Form("QueryRunParameters - Invalid parameters for Run %d: startTime = %d, endTime = %d",
1750                                 run, startTime, endTime));
1751                 delete entry;
1752                 delete aRow;
1753                 delete aResult;
1754                 return 0;
1755         }
1756
1757         delete aRow;
1758         delete aResult;
1759
1760         return entry;
1761 }
1762
1763 //______________________________________________________________________________________________
1764 Bool_t AliShuttle::GetValueSet(const char* host, Int_t port, const char* entry,
1765                                 TObjArray* valueSet, DCSType type)
1766 {
1767         // Retrieve all "entry" data points from the DCS server
1768         // host, port: TSocket connection parameters
1769         // entry: name of the alias or data point
1770         // valueSet: array of retrieved AliDCSValue's
1771         // type: kAlias or kDP
1772
1773         AliDCSClient client(host, port, fTimeout, fRetries);
1774         if (!client.IsConnected())
1775         {
1776                 return kFALSE;
1777         }
1778
1779         Int_t result=0;
1780
1781         if (type == kAlias)
1782         {
1783                 result = client.GetAliasValues(entry,
1784                         GetCurrentStartTime(), GetCurrentEndTime(), valueSet);
1785         } else
1786         if (type == kDP)
1787         {
1788                 result = client.GetDPValues(entry,
1789                         GetCurrentStartTime(), GetCurrentEndTime(), valueSet);
1790         }
1791
1792         if (result < 0)
1793         {
1794                 Log(fCurrentDetector.Data(), Form("GetValueSet - Can't get '%s'! Reason: %s",
1795                         entry, AliDCSClient::GetErrorString(result)));
1796
1797                 if (result == AliDCSClient::fgkServerError)
1798                 {
1799                         Log(fCurrentDetector.Data(), Form("GetValueSet - Server error: %s",
1800                                 client.GetServerError().Data()));
1801                 }
1802
1803                 return kFALSE;
1804         }
1805
1806         return kTRUE;
1807 }
1808
1809 //______________________________________________________________________________________________
1810 TMap* AliShuttle::GetValueSet(const char* host, Int_t port, const TSeqCollection* entries,
1811                               DCSType type)
1812 {
1813         // Retrieve all "entry" data points from the DCS server
1814         // host, port: TSocket connection parameters
1815         // entries: list of name of the alias or data point
1816         // type: kAlias or kDP
1817         // returns TMap of values, 0 when failure
1818
1819         AliDCSClient client(host, port, fTimeout, fRetries);
1820
1821         TMap* result = 0;
1822         if (type == kAlias)
1823         {
1824                 result = client.GetAliasValues(entries, GetCurrentStartTime(), 
1825                         GetCurrentEndTime());
1826         } 
1827         else if (type == kDP)
1828         {
1829                 result = client.GetDPValues(entries, GetCurrentStartTime(), 
1830                         GetCurrentEndTime());
1831         }
1832
1833         if (result == 0)
1834         {
1835                 Log(fCurrentDetector.Data(), Form("GetValueSet - Can't get entries! Reason: %s",
1836                         client.GetServerError().Data()));
1837
1838                 return 0;
1839         }
1840                 
1841         return result;
1842 }
1843
1844 //______________________________________________________________________________________________
1845 const char* AliShuttle::GetFile(Int_t system, const char* detector,
1846                 const char* id, const char* source)
1847 {
1848         // Get calibration file from file exchange servers
1849         // First queris the FXS database for the file name, using the run, detector, id and source info
1850         // then calls RetrieveFile(filename) for actual copy to local disk
1851         // run: current run being processed (given by Logbook entry fLogbookEntry)
1852         // detector: the Preprocessor name
1853         // id: provided as a parameter by the Preprocessor
1854         // source: provided by the Preprocessor through GetFileSources function
1855
1856         // check if test mode should simulate a FXS error
1857         if (fTestMode & kErrorFXSFiles)
1858         {
1859                 Log(detector, Form("GetFile - In TESTMODE - Simulating error while connecting to %s FXS", GetSystemName(system)));
1860                 return 0;
1861         }
1862         
1863         // check connection, in case connect
1864         if (!Connect(system))
1865         {
1866                 Log(detector, Form("GetFile - Couldn't connect to %s FXS database", GetSystemName(system)));
1867                 return 0;
1868         }
1869
1870         // Query preparation
1871         TString sourceName(source);
1872         Int_t nFields = 3;
1873         TString sqlQueryStart = Form("select filePath,size,fileChecksum from %s where",
1874                                                                 fConfig->GetFXSdbTable(system));
1875         TString whereClause = Form("run=%d and detector=\"%s\" and fileId=\"%s\"",
1876                                                                 GetCurrentRun(), detector, id);
1877
1878         if (system == kDAQ)
1879         {
1880                 whereClause += Form(" and DAQsource=\"%s\"", source);
1881         }
1882         else if (system == kDCS)
1883         {
1884                 sourceName="none";
1885         }
1886         else if (system == kHLT)
1887         {
1888                 whereClause += Form(" and DDLnumbers=\"%s\"", source);
1889                 nFields = 3;
1890         }
1891
1892         TString sqlQuery = Form("%s %s", sqlQueryStart.Data(), whereClause.Data());
1893
1894         AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
1895
1896         // Query execution
1897         TSQLResult* aResult = 0;
1898         aResult = dynamic_cast<TSQLResult*> (fServer[system]->Query(sqlQuery));
1899         if (!aResult) {
1900                 Log(detector, Form("GetFileName - Can't execute SQL query to %s database for: id = %s, source = %s",
1901                                 GetSystemName(system), id, sourceName.Data()));
1902                 return 0;
1903         }
1904
1905         if(aResult->GetRowCount() == 0)
1906         {
1907                 Log(detector,
1908                         Form("GetFileName - No entry in %s FXS db for: id = %s, source = %s",
1909                                 GetSystemName(system), id, sourceName.Data()));
1910                 delete aResult;
1911                 return 0;
1912         }
1913
1914         if (aResult->GetRowCount() > 1) {
1915                 Log(detector,
1916                         Form("GetFileName - More than one entry in %s FXS db for: id = %s, source = %s",
1917                                 GetSystemName(system), id, sourceName.Data()));
1918                 delete aResult;
1919                 return 0;
1920         }
1921
1922         if (aResult->GetFieldCount() != nFields) {
1923                 Log(detector,
1924                         Form("GetFileName - Wrong field count in %s FXS db for: id = %s, source = %s",
1925                                 GetSystemName(system), id, sourceName.Data()));
1926                 delete aResult;
1927                 return 0;
1928         }
1929
1930         TSQLRow* aRow = dynamic_cast<TSQLRow*> (aResult->Next());
1931
1932         if (!aRow){
1933                 Log(detector, Form("GetFileName - Empty set result in %s FXS db from query: id = %s, source = %s",
1934                                 GetSystemName(system), id, sourceName.Data()));
1935                 delete aResult;
1936                 return 0;
1937         }
1938
1939         TString filePath(aRow->GetField(0), aRow->GetFieldLength(0));
1940         TString fileSize(aRow->GetField(1), aRow->GetFieldLength(1));
1941         TString fileChecksum(aRow->GetField(2), aRow->GetFieldLength(2));
1942
1943         delete aResult;
1944         delete aRow;
1945
1946         AliDebug(2, Form("filePath = %s; size = %s, fileChecksum = %s",
1947                                 filePath.Data(), fileSize.Data(), fileChecksum.Data()));
1948
1949         // retrieved file is renamed to make it unique
1950         TString localFileName = Form("%s_%s_%d_%s_%s.shuttle",
1951                                         GetSystemName(system), detector, GetCurrentRun(), id, sourceName.Data());
1952
1953
1954         // file retrieval from FXS
1955         UInt_t nRetries = 0;
1956         UInt_t maxRetries = 3;
1957         Bool_t result = kFALSE;
1958
1959         // copy!! if successful TSystem::Exec returns 0
1960         while(nRetries++ < maxRetries) {
1961                 AliDebug(2, Form("Trying to copy file. Retry # %d", nRetries));
1962                 result = RetrieveFile(system, filePath.Data(), localFileName.Data());
1963                 if(!result)
1964                 {
1965                         Log(detector, Form("GetFileName - Copy of file %s from %s FXS failed",
1966                                         filePath.Data(), GetSystemName(system)));
1967                         continue;
1968                 } 
1969
1970                 if (fileChecksum.Length()>0)
1971                 {
1972                         // compare md5sum of local file with the one stored in the FXS DB
1973                         Int_t md5Comp = gSystem->Exec(Form("md5sum %s/%s |grep %s 2>&1 > /dev/null",
1974                                                 GetShuttleTempDir(), localFileName.Data(), fileChecksum.Data()));
1975
1976                         if (md5Comp != 0)
1977                         {
1978                                 Log(detector, Form("GetFileName - md5sum of file %s does not match with local copy!",
1979                                                         filePath.Data()));
1980                                 result = kFALSE;
1981                                 continue;
1982                         }
1983                 } else {
1984                         Log(fCurrentDetector, Form("GetFile - md5sum of file %s not set in %s database, skipping comparison",
1985                                                         filePath.Data(), GetSystemName(system)));
1986                 }
1987                 if (result) break;
1988         }
1989
1990         if(!result) return 0;
1991
1992         fFXSCalled[system]=kTRUE;
1993         TObjString *fileParams = new TObjString(Form("%s#!?!#%s", id, sourceName.Data()));
1994         fFXSlist[system].Add(fileParams);
1995
1996         static TString fullLocalFileName;
1997         fullLocalFileName.Form("%s/%s", GetShuttleTempDir(), localFileName.Data());
1998
1999         Log(fCurrentDetector, Form("GetFile - Retrieved file with id %s and source %s from %s to %s", id, source, GetSystemName(system), fullLocalFileName.Data()));
2000
2001         return fullLocalFileName.Data();
2002 }
2003
2004 //______________________________________________________________________________________________
2005 Bool_t AliShuttle::RetrieveFile(UInt_t system, const char* fxsFileName, const char* localFileName)
2006 {
2007         //
2008         // Copies file from FXS to local Shuttle machine
2009         //
2010
2011         // check temp directory: trying to cd to temp; if it does not exist, create it
2012         AliDebug(2, Form("Copy file %s from %s FXS into %s/%s",
2013                         GetSystemName(system), fxsFileName, GetShuttleTempDir(), localFileName));
2014
2015         void* dir = gSystem->OpenDirectory(GetShuttleTempDir());
2016         if (dir == NULL) {
2017                 if (gSystem->mkdir(GetShuttleTempDir(), kTRUE)) {
2018                         AliError(Form("Can't open directory <%s>", GetShuttleTempDir()));
2019                         return kFALSE;
2020                 }
2021
2022         } else {
2023                 gSystem->FreeDirectory(dir);
2024         }
2025
2026         TString baseFXSFolder;
2027         if (system == kDAQ)
2028         {
2029                 baseFXSFolder = "FES/";
2030         }
2031         else if (system == kDCS)
2032         {
2033                 baseFXSFolder = "";
2034         }
2035         else if (system == kHLT)
2036         {
2037                 baseFXSFolder = "/opt/FXS/";
2038         }
2039
2040
2041         TString command = Form("scp -oPort=%d -2 %s@%s:%s%s %s/%s",
2042                 fConfig->GetFXSPort(system),
2043                 fConfig->GetFXSUser(system),
2044                 fConfig->GetFXSHost(system),
2045                 baseFXSFolder.Data(),
2046                 fxsFileName,
2047                 GetShuttleTempDir(),
2048                 localFileName);
2049
2050         AliDebug(2, Form("%s",command.Data()));
2051
2052         Bool_t result = (gSystem->Exec(command.Data()) == 0);
2053
2054         return result;
2055 }
2056
2057 //______________________________________________________________________________________________
2058 TList* AliShuttle::GetFileSources(Int_t system, const char* detector, const char* id)
2059 {
2060         //
2061         // Get sources producing the condition file Id from file exchange servers
2062         // if id is NULL all sources are returned (distinct)
2063         //
2064
2065         Log(detector, Form("GetFileSources - Retrieving sources with id %s from %s", id, GetSystemName(system)));
2066         
2067         // check if test mode should simulate a FXS error
2068         if (fTestMode & kErrorFXSSources)
2069         {
2070                 Log(detector, Form("GetFileSources - In TESTMODE - Simulating error while connecting to %s FXS", GetSystemName(system)));
2071                 return 0;
2072         }
2073
2074         if (system == kDCS)
2075         {
2076                 AliWarning("DCS system has only one source of data!");
2077                 TList *list = new TList();
2078                 list->SetOwner(1);
2079                 list->Add(new TObjString(" "));
2080                 return list;
2081         }
2082
2083         // check connection, in case connect
2084         if (!Connect(system))
2085         {
2086                 Log(detector, Form("GetFileSources - Couldn't connect to %s FXS database", GetSystemName(system)));
2087                 return NULL;
2088         }
2089
2090         TString sourceName = 0;
2091         if (system == kDAQ)
2092         {
2093                 sourceName = "DAQsource";
2094         } else if (system == kHLT)
2095         {
2096                 sourceName = "DDLnumbers";
2097         }
2098
2099         TString sqlQueryStart = Form("select distinct %s from %s where", sourceName.Data(), fConfig->GetFXSdbTable(system));
2100         TString whereClause = Form("run=%d and detector=\"%s\"",
2101                                 GetCurrentRun(), detector);
2102         if (id)
2103                 whereClause += Form(" and fileId=\"%s\"", id);
2104         TString sqlQuery = Form("%s %s", sqlQueryStart.Data(), whereClause.Data());
2105
2106         AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
2107
2108         // Query execution
2109         TSQLResult* aResult;
2110         aResult = fServer[system]->Query(sqlQuery);
2111         if (!aResult) {
2112                 Log(detector, Form("GetFileSources - Can't execute SQL query to %s database for id: %s",
2113                                 GetSystemName(system), id));
2114                 return 0;
2115         }
2116
2117         TList *list = new TList();
2118         list->SetOwner(1);
2119         
2120         if (aResult->GetRowCount() == 0)
2121         {
2122                 Log(detector,
2123                         Form("GetFileSources - No entry in %s FXS table for id: %s", GetSystemName(system), id));
2124                 delete aResult;
2125                 return list;
2126         }
2127
2128         Log(detector, Form("GetFileSources - Found %d sources", aResult->GetRowCount()));
2129
2130         TSQLRow* aRow;
2131         while ((aRow = aResult->Next()))
2132         {
2133
2134                 TString source(aRow->GetField(0), aRow->GetFieldLength(0));
2135                 AliDebug(2, Form("%s = %s", sourceName.Data(), source.Data()));
2136                 list->Add(new TObjString(source));
2137                 delete aRow;
2138         }
2139
2140         delete aResult;
2141
2142         return list;
2143 }
2144
2145 //______________________________________________________________________________________________
2146 TList* AliShuttle::GetFileIDs(Int_t system, const char* detector, const char* source)
2147 {
2148         //
2149         // Get all ids of condition files produced by a given source from file exchange servers
2150         //
2151         
2152         Log(detector, Form("GetFileIDs - Retrieving ids with source %s with %s", source, GetSystemName(system)));
2153
2154         // check if test mode should simulate a FXS error
2155         if (fTestMode & kErrorFXSSources)
2156         {
2157                 Log(detector, Form("GetFileIDs - In TESTMODE - Simulating error while connecting to %s FXS", GetSystemName(system)));
2158                 return 0;
2159         }
2160
2161         // check connection, in case connect
2162         if (!Connect(system))
2163         {
2164                 Log(detector, Form("GetFileIDs - Couldn't connect to %s FXS database", GetSystemName(system)));
2165                 return NULL;
2166         }
2167
2168         TString sourceName = 0;
2169         if (system == kDAQ)
2170         {
2171                 sourceName = "DAQsource";
2172         } else if (system == kHLT)
2173         {
2174                 sourceName = "DDLnumbers";
2175         }
2176
2177         TString sqlQueryStart = Form("select fileId from %s where", fConfig->GetFXSdbTable(system));
2178         TString whereClause = Form("run=%d and detector=\"%s\"",
2179                                 GetCurrentRun(), detector);
2180         if (sourceName.Length() > 0 && source)
2181                 whereClause += Form(" and %s=\"%s\"", sourceName.Data(), source);
2182         TString sqlQuery = Form("%s %s", sqlQueryStart.Data(), whereClause.Data());
2183
2184         AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
2185
2186         // Query execution
2187         TSQLResult* aResult;
2188         aResult = fServer[system]->Query(sqlQuery);
2189         if (!aResult) {
2190                 Log(detector, Form("GetFileIDs - Can't execute SQL query to %s database for source: %s",
2191                                 GetSystemName(system), source));
2192                 return 0;
2193         }
2194
2195         TList *list = new TList();
2196         list->SetOwner(1);
2197         
2198         if (aResult->GetRowCount() == 0)
2199         {
2200                 Log(detector,
2201                         Form("GetFileIDs - No entry in %s FXS table for source: %s", GetSystemName(system), source));
2202                 delete aResult;
2203                 return list;
2204         }
2205
2206         Log(detector, Form("GetFileIDs - Found %d ids", aResult->GetRowCount()));
2207
2208         TSQLRow* aRow;
2209
2210         while ((aRow = aResult->Next()))
2211         {
2212
2213                 TString id(aRow->GetField(0), aRow->GetFieldLength(0));
2214                 AliDebug(2, Form("fileId = %s", id.Data()));
2215                 list->Add(new TObjString(id));
2216                 delete aRow;
2217         }
2218
2219         delete aResult;
2220
2221         return list;
2222 }
2223
2224 //______________________________________________________________________________________________
2225 Bool_t AliShuttle::Connect(Int_t system)
2226 {
2227         // Connect to MySQL Server of the system's FXS MySQL databases
2228         // DAQ Logbook, Shuttle Logbook and DAQ FXS db are on the same host
2229         //
2230
2231         // check connection: if already connected return
2232         if(fServer[system] && fServer[system]->IsConnected()) return kTRUE;
2233
2234         TString dbHost, dbUser, dbPass, dbName;
2235
2236         if (system < 3) // FXS db servers
2237         {
2238                 dbHost = Form("mysql://%s:%d", fConfig->GetFXSdbHost(system), fConfig->GetFXSdbPort(system));
2239                 dbUser = fConfig->GetFXSdbUser(system);
2240                 dbPass = fConfig->GetFXSdbPass(system);
2241                 dbName =   fConfig->GetFXSdbName(system);
2242         } else { // Run & Shuttle logbook servers
2243         // TODO Will the Shuttle logbook server be the same as the Run logbook server ???
2244                 dbHost = Form("mysql://%s:%d", fConfig->GetDAQlbHost(), fConfig->GetDAQlbPort());
2245                 dbUser = fConfig->GetDAQlbUser();
2246                 dbPass = fConfig->GetDAQlbPass();
2247                 dbName =   fConfig->GetDAQlbDB();
2248         }
2249
2250         fServer[system] = TSQLServer::Connect(dbHost.Data(), dbUser.Data(), dbPass.Data());
2251         if (!fServer[system] || !fServer[system]->IsConnected()) {
2252                 if(system < 3)
2253                 {
2254                 AliError(Form("Can't establish connection to FXS database for %s",
2255                                         AliShuttleInterface::GetSystemName(system)));
2256                 } else {
2257                 AliError("Can't establish connection to Run logbook.");
2258                 }
2259                 if(fServer[system]) delete fServer[system];
2260                 return kFALSE;
2261         }
2262
2263         // Get tables
2264         TSQLResult* aResult=0;
2265         switch(system){
2266                 case kDAQ:
2267                         aResult = fServer[kDAQ]->GetTables(dbName.Data());
2268                         break;
2269                 case kDCS:
2270                         aResult = fServer[kDCS]->GetTables(dbName.Data());
2271                         break;
2272                 case kHLT:
2273                         aResult = fServer[kHLT]->GetTables(dbName.Data());
2274                         break;
2275                 default:
2276                         aResult = fServer[3]->GetTables(dbName.Data());
2277                         break;
2278         }
2279
2280         delete aResult;
2281         return kTRUE;
2282 }
2283
2284 //______________________________________________________________________________________________
2285 Bool_t AliShuttle::UpdateTable()
2286 {
2287         //
2288         // Update FXS table filling time_processed field in all rows corresponding to current run and detector
2289         //
2290
2291         Bool_t result = kTRUE;
2292
2293         for (UInt_t system=0; system<3; system++)
2294         {
2295                 if(!fFXSCalled[system]) continue;
2296
2297                 // check connection, in case connect
2298                 if (!Connect(system))
2299                 {
2300                         Log(fCurrentDetector, Form("UpdateTable - Couldn't connect to %s FXS database", GetSystemName(system)));
2301                         result = kFALSE;
2302                         continue;
2303                 }
2304
2305                 TTimeStamp now; // now
2306
2307                 // Loop on FXS list entries
2308                 TIter iter(&fFXSlist[system]);
2309                 TObjString *aFXSentry=0;
2310                 while ((aFXSentry = dynamic_cast<TObjString*> (iter.Next())))
2311                 {
2312                         TString aFXSentrystr = aFXSentry->String();
2313                         TObjArray *aFXSarray = aFXSentrystr.Tokenize("#!?!#");
2314                         if (!aFXSarray || aFXSarray->GetEntries() != 2 )
2315                         {
2316                                 Log(fCurrentDetector, Form("UpdateTable - error updating %s FXS entry. Check string: <%s>",
2317                                         GetSystemName(system), aFXSentrystr.Data()));
2318                                 if(aFXSarray) delete aFXSarray;
2319                                 result = kFALSE;
2320                                 continue;
2321                         }
2322                         const char* fileId = ((TObjString*) aFXSarray->At(0))->GetName();
2323                         const char* source = ((TObjString*) aFXSarray->At(1))->GetName();
2324
2325                         TString whereClause;
2326                         if (system == kDAQ)
2327                         {
2328                                 whereClause = Form("where run=%d and detector=\"%s\" and fileId=\"%s\" and DAQsource=\"%s\";",
2329                                                         GetCurrentRun(), fCurrentDetector.Data(), fileId, source);
2330                         }
2331                         else if (system == kDCS)
2332                         {
2333                                 whereClause = Form("where run=%d and detector=\"%s\" and fileId=\"%s\";",
2334                                                         GetCurrentRun(), fCurrentDetector.Data(), fileId);
2335                         }
2336                         else if (system == kHLT)
2337                         {
2338                                 whereClause = Form("where run=%d and detector=\"%s\" and fileId=\"%s\" and DDLnumbers=\"%s\";",
2339                                                         GetCurrentRun(), fCurrentDetector.Data(), fileId, source);
2340                         }
2341
2342                         delete aFXSarray;
2343
2344                         TString sqlQuery = Form("update %s set time_processed=%d %s", fConfig->GetFXSdbTable(system),
2345                                                                 now.GetSec(), whereClause.Data());
2346
2347                         AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
2348
2349                         // Query execution
2350                         TSQLResult* aResult;
2351                         aResult = dynamic_cast<TSQLResult*> (fServer[system]->Query(sqlQuery));
2352                         if (!aResult)
2353                         {
2354                                 Log(fCurrentDetector, Form("UpdateTable - %s db: can't execute SQL query <%s>",
2355                                                                 GetSystemName(system), sqlQuery.Data()));
2356                                 result = kFALSE;
2357                                 continue;
2358                         }
2359                         delete aResult;
2360                 }
2361         }
2362
2363         return result;
2364 }
2365
2366 //______________________________________________________________________________________________
2367 Bool_t AliShuttle::UpdateTableFailCase()
2368 {
2369         // Update FXS table filling time_processed field in all rows corresponding to current run and detector
2370         // this is called in case the preprocessor is declared failed for the current run, because
2371         // the fields are updated only in case of success
2372
2373         Bool_t result = kTRUE;
2374
2375         for (UInt_t system=0; system<3; system++)
2376         {
2377                 // check connection, in case connect
2378                 if (!Connect(system))
2379                 {
2380                         Log(fCurrentDetector, Form("UpdateTableFailCase - Couldn't connect to %s FXS database",
2381                                                         GetSystemName(system)));
2382                         result = kFALSE;
2383                         continue;
2384                 }
2385
2386                 TTimeStamp now; // now
2387
2388                 // Loop on FXS list entries
2389
2390                 TString whereClause = Form("where run=%d and detector=\"%s\";",
2391                                                 GetCurrentRun(), fCurrentDetector.Data());
2392
2393
2394                 TString sqlQuery = Form("update %s set time_processed=%d %s", fConfig->GetFXSdbTable(system),
2395                                                         now.GetSec(), whereClause.Data());
2396
2397                 AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
2398
2399                 // Query execution
2400                 TSQLResult* aResult;
2401                 aResult = dynamic_cast<TSQLResult*> (fServer[system]->Query(sqlQuery));
2402                 if (!aResult)
2403                 {
2404                         Log(fCurrentDetector, Form("UpdateTableFailCase - %s db: can't execute SQL query <%s>",
2405                                                         GetSystemName(system), sqlQuery.Data()));
2406                         result = kFALSE;
2407                         continue;
2408                 }
2409                 delete aResult;
2410         }
2411
2412         return result;
2413 }
2414
2415 //______________________________________________________________________________________________
2416 Bool_t AliShuttle::UpdateShuttleLogbook(const char* detector, const char* status)
2417 {
2418         //
2419         // Update Shuttle logbook filling detector or shuttle_done column
2420         // ex. of usage: UpdateShuttleLogbook("PHOS", "DONE") or UpdateShuttleLogbook("shuttle_done")
2421         //
2422
2423         // check connection, in case connect
2424         if(!Connect(3)){
2425                 Log("SHUTTLE", "UpdateShuttleLogbook - Couldn't connect to DAQ Logbook.");
2426                 return kFALSE;
2427         }
2428
2429         TString detName(detector);
2430         TString setClause;
2431         if(detName == "shuttle_done")
2432         {
2433                 setClause = "set shuttle_done=1";
2434
2435                 // Send the information to ML
2436                 TMonaLisaText  mlStatus("SHUTTLE_status", "Done");
2437
2438                 TList mlList;
2439                 mlList.Add(&mlStatus);
2440
2441                 fMonaLisa->SendParameters(&mlList);
2442         } else {
2443                 TString statusStr(status);
2444                 if(statusStr.Contains("done", TString::kIgnoreCase) ||
2445                    statusStr.Contains("failed", TString::kIgnoreCase)){
2446                         setClause = Form("set %s=\"%s\"", detector, status);
2447                 } else {
2448                         Log("SHUTTLE",
2449                                 Form("UpdateShuttleLogbook - Invalid status <%s> for detector %s",
2450                                         status, detector));
2451                         return kFALSE;
2452                 }
2453         }
2454
2455         TString whereClause = Form("where run=%d", GetCurrentRun());
2456
2457         TString sqlQuery = Form("update %s %s %s",
2458                                         fConfig->GetShuttlelbTable(), setClause.Data(), whereClause.Data());
2459
2460         AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
2461
2462         // Query execution
2463         TSQLResult* aResult;
2464         aResult = dynamic_cast<TSQLResult*> (fServer[3]->Query(sqlQuery));
2465         if (!aResult) {
2466                 Log("SHUTTLE", Form("UpdateShuttleLogbook - Can't execute query <%s>", sqlQuery.Data()));
2467                 return kFALSE;
2468         }
2469         delete aResult;
2470
2471         return kTRUE;
2472 }
2473
2474 //______________________________________________________________________________________________
2475 Int_t AliShuttle::GetCurrentRun() const
2476 {
2477         //
2478         // Get current run from logbook entry
2479         //
2480
2481         return fLogbookEntry ? fLogbookEntry->GetRun() : -1;
2482 }
2483
2484 //______________________________________________________________________________________________
2485 UInt_t AliShuttle::GetCurrentStartTime() const
2486 {
2487         //
2488         // get current start time
2489         //
2490
2491         return fLogbookEntry ? fLogbookEntry->GetStartTime() : 0;
2492 }
2493
2494 //______________________________________________________________________________________________
2495 UInt_t AliShuttle::GetCurrentEndTime() const
2496 {
2497         //
2498         // get current end time from logbook entry
2499         //
2500
2501         return fLogbookEntry ? fLogbookEntry->GetEndTime() : 0;
2502 }
2503
2504 //______________________________________________________________________________________________
2505 void AliShuttle::Log(const char* detector, const char* message)
2506 {
2507         //
2508         // Fill log string with a message
2509         //
2510
2511         void* dir = gSystem->OpenDirectory(GetShuttleLogDir());
2512         if (dir == NULL) {
2513                 if (gSystem->mkdir(GetShuttleLogDir(), kTRUE)) {
2514                         AliError(Form("Can't open directory <%s>", GetShuttleLogDir()));
2515                         return;
2516                 }
2517
2518         } else {
2519                 gSystem->FreeDirectory(dir);
2520         }
2521
2522         TString toLog = Form("%s (%d): %s - ", TTimeStamp(time(0)).AsString("s"), getpid(), detector);
2523         if (GetCurrentRun() >= 0) 
2524                 toLog += Form("run %d - ", GetCurrentRun());
2525         toLog += Form("%s", message);
2526
2527         AliInfo(toLog.Data());
2528         
2529         // if we redirect the log output already to the file, leave here
2530         if (fOutputRedirected && strcmp(detector, "SHUTTLE") != 0)
2531                 return;
2532
2533         TString fileName = GetLogFileName(detector);
2534         
2535         gSystem->ExpandPathName(fileName);
2536
2537         ofstream logFile;
2538         logFile.open(fileName, ofstream::out | ofstream::app);
2539
2540         if (!logFile.is_open()) {
2541                 AliError(Form("Could not open file %s", fileName.Data()));
2542                 return;
2543         }
2544
2545         logFile << toLog.Data() << "\n";
2546
2547         logFile.close();
2548 }
2549
2550 //______________________________________________________________________________________________
2551 TString AliShuttle::GetLogFileName(const char* detector) const
2552 {
2553         // 
2554         // returns the name of the log file for a given sub detector
2555         //
2556         
2557         TString fileName;
2558         
2559         if (GetCurrentRun() >= 0) 
2560                 fileName.Form("%s/%s_%d.log", GetShuttleLogDir(), detector, GetCurrentRun());
2561         else
2562                 fileName.Form("%s/%s.log", GetShuttleLogDir(), detector);
2563
2564         return fileName;
2565 }
2566
2567 //______________________________________________________________________________________________
2568 Bool_t AliShuttle::Collect(Int_t run)
2569 {
2570         //
2571         // Collects conditions data for all UNPROCESSED run written to DAQ LogBook in case of run = -1 (default)
2572         // If a dedicated run is given this run is processed
2573         //
2574         // In operational mode, this is the Shuttle function triggered by the EOR signal.
2575         //
2576
2577         if (run == -1)
2578                 Log("SHUTTLE","Collect - Shuttle called. Collecting conditions data for unprocessed runs");
2579         else
2580                 Log("SHUTTLE", Form("Collect - Shuttle called. Collecting conditions data for run %d", run));
2581
2582         SetLastAction("Starting");
2583
2584         TString whereClause("where shuttle_done=0");
2585         if (run != -1)
2586                 whereClause += Form(" and run=%d", run);
2587
2588         TObjArray shuttleLogbookEntries;
2589         if (!QueryShuttleLogbook(whereClause, shuttleLogbookEntries))
2590         {
2591                 Log("SHUTTLE", "Collect - Can't retrieve entries from Shuttle logbook");
2592                 return kFALSE;
2593         }
2594
2595         if (shuttleLogbookEntries.GetEntries() == 0)
2596         {
2597                 if (run == -1)
2598                         Log("SHUTTLE","Collect - Found no UNPROCESSED runs in Shuttle logbook");
2599                 else
2600                         Log("SHUTTLE", Form("Collect - Run %d is already DONE "
2601                                                 "or it does not exist in Shuttle logbook", run));
2602                 return kTRUE;
2603         }
2604
2605         for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
2606                 fFirstUnprocessed[iDet] = kTRUE;
2607
2608         if (run != -1)
2609         {
2610                 // query Shuttle logbook for earlier runs, check if some detectors are unprocessed,
2611                 // flag them into fFirstUnprocessed array
2612                 TString whereClause(Form("where shuttle_done=0 and run < %d", run));
2613                 TObjArray tmpLogbookEntries;
2614                 if (!QueryShuttleLogbook(whereClause, tmpLogbookEntries))
2615                 {
2616                         Log("SHUTTLE", "Collect - Can't retrieve entries from Shuttle logbook");
2617                         return kFALSE;
2618                 }
2619
2620                 TIter iter(&tmpLogbookEntries);
2621                 AliShuttleLogbookEntry* anEntry = 0;
2622                 while ((anEntry = dynamic_cast<AliShuttleLogbookEntry*> (iter.Next())))
2623                 {
2624                         for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
2625                         {
2626                                 if (anEntry->GetDetectorStatus(iDet) == AliShuttleLogbookEntry::kUnprocessed)
2627                                 {
2628                                         AliDebug(2, Form("Run %d: setting %s as \"not first time unprocessed\"",
2629                                                         anEntry->GetRun(), GetDetName(iDet)));
2630                                         fFirstUnprocessed[iDet] = kFALSE;
2631                                 }
2632                         }
2633
2634                 }
2635
2636         }
2637
2638         if (!RetrieveConditionsData(shuttleLogbookEntries))
2639         {
2640                 Log("SHUTTLE", "Collect - Process of at least one run failed");
2641                 return kFALSE;
2642         }
2643
2644         Log("SHUTTLE", "Collect - Requested run(s) successfully processed");
2645         return kTRUE;
2646 }
2647
2648 //______________________________________________________________________________________________
2649 Bool_t AliShuttle::RetrieveConditionsData(const TObjArray& dateEntries)
2650 {
2651         //
2652         // Retrieve conditions data for all runs that aren't processed yet
2653         //
2654
2655         Bool_t hasError = kFALSE;
2656
2657         TIter iter(&dateEntries);
2658         AliShuttleLogbookEntry* anEntry;
2659
2660         while ((anEntry = (AliShuttleLogbookEntry*) iter.Next())){
2661                 if (!Process(anEntry)){
2662                         hasError = kTRUE;
2663                 }
2664
2665                 // clean SHUTTLE temp directory
2666                 TString filename = Form("%s/*.shuttle", GetShuttleTempDir());
2667                 RemoveFile(filename.Data());
2668         }
2669
2670         return hasError == kFALSE;
2671 }
2672
2673 //______________________________________________________________________________________________
2674 ULong_t AliShuttle::GetTimeOfLastAction() const
2675 {
2676         //
2677         // Gets time of last action
2678         //
2679
2680         ULong_t tmp;
2681
2682         fMonitoringMutex->Lock();
2683
2684         tmp = fLastActionTime;
2685
2686         fMonitoringMutex->UnLock();
2687
2688         return tmp;
2689 }
2690
2691 //______________________________________________________________________________________________
2692 const TString AliShuttle::GetLastAction() const
2693 {
2694         //
2695         // returns a string description of the last action
2696         //
2697
2698         TString tmp;
2699
2700         fMonitoringMutex->Lock();
2701         
2702         tmp = fLastAction;
2703         
2704         fMonitoringMutex->UnLock();
2705
2706         return tmp;
2707 }
2708
2709 //______________________________________________________________________________________________
2710 void AliShuttle::SetLastAction(const char* action)
2711 {
2712         //
2713         // updates the monitoring variables
2714         //
2715
2716         fMonitoringMutex->Lock();
2717
2718         fLastAction = action;
2719         fLastActionTime = time(0);
2720         
2721         fMonitoringMutex->UnLock();
2722 }
2723
2724 //______________________________________________________________________________________________
2725 const char* AliShuttle::GetRunParameter(const char* param)
2726 {
2727         //
2728         // returns run parameter read from DAQ logbook
2729         //
2730
2731         if(!fLogbookEntry) {
2732                 AliError("No logbook entry!");
2733                 return 0;
2734         }
2735
2736         return fLogbookEntry->GetRunParameter(param);
2737 }
2738
2739 //______________________________________________________________________________________________
2740 AliCDBEntry* AliShuttle::GetFromOCDB(const char* detector, const AliCDBPath& path)
2741 {
2742         //
2743         // returns object from OCDB valid for current run
2744         //
2745
2746         if (fTestMode & kErrorOCDB)
2747         {
2748                 Log(detector, "GetFromOCDB - In TESTMODE - Simulating error with OCDB");
2749                 return 0;
2750         }
2751         
2752         AliCDBStorage *sto = AliCDBManager::Instance()->GetStorage(fgkMainCDB);
2753         if (!sto)
2754         {
2755                 Log(detector, "GetFromOCDB - Cannot activate main OCDB for query!");
2756                 return 0;
2757         }
2758
2759         return dynamic_cast<AliCDBEntry*> (sto->Get(path, GetCurrentRun()));
2760 }
2761
2762 //______________________________________________________________________________________________
2763 Bool_t AliShuttle::SendMail()
2764 {
2765         //
2766         // sends a mail to the subdetector expert in case of preprocessor error
2767         //
2768         
2769         if (fTestMode != kNone)
2770                 return kTRUE;
2771
2772         void* dir = gSystem->OpenDirectory(GetShuttleLogDir());
2773         if (dir == NULL)
2774         {
2775                 if (gSystem->mkdir(GetShuttleLogDir(), kTRUE))
2776                 {
2777                         AliError(Form("Can't open directory <%s>", GetShuttleLogDir()));
2778                         return kFALSE;
2779                 }
2780
2781         } else {
2782                 gSystem->FreeDirectory(dir);
2783         }
2784
2785         TString bodyFileName;
2786         bodyFileName.Form("%s/mail.body", GetShuttleLogDir());
2787         gSystem->ExpandPathName(bodyFileName);
2788
2789         ofstream mailBody;
2790         mailBody.open(bodyFileName, ofstream::out);
2791
2792         if (!mailBody.is_open())
2793         {
2794                 AliError(Form("Could not open mail body file %s", bodyFileName.Data()));
2795                 return kFALSE;
2796         }
2797
2798         TString to="";
2799         TIter iterExperts(fConfig->GetResponsibles(fCurrentDetector));
2800         TObjString *anExpert=0;
2801         while ((anExpert = (TObjString*) iterExperts.Next()))
2802         {
2803                 to += Form("%s,", anExpert->GetName());
2804         }
2805         to.Remove(to.Length()-1);
2806         AliDebug(2, Form("to: %s",to.Data()));
2807
2808         if (to.IsNull()) {
2809                 AliInfo("List of detector responsibles not yet set!");
2810                 return kFALSE;
2811         }
2812
2813         TString cc="alberto.colla@cern.ch";
2814
2815         TString subject = Form("%s Shuttle preprocessor FAILED in run %d !",
2816                                 fCurrentDetector.Data(), GetCurrentRun());
2817         AliDebug(2, Form("subject: %s", subject.Data()));
2818
2819         TString body = Form("Dear %s expert(s), \n\n", fCurrentDetector.Data());
2820         body += Form("SHUTTLE just detected that your preprocessor "
2821                         "failed processing run %d!!\n\n", GetCurrentRun());
2822         body += Form("Please check %s status on the SHUTTLE monitoring page: \n\n", fCurrentDetector.Data());
2823         body += Form("\thttp://pcalimonitor.cern.ch:8889/shuttle.jsp?time=168 \n\n");
2824         body += Form("Find the %s log for the current run on \n\n"
2825                 "\thttp://pcalishuttle01.cern.ch:8880/logs/%s_%d.log \n\n", 
2826                 fCurrentDetector.Data(), fCurrentDetector.Data(), GetCurrentRun());
2827         body += Form("The last 10 lines of %s log file are following:\n\n");
2828
2829         AliDebug(2, Form("Body begin: %s", body.Data()));
2830
2831         mailBody << body.Data();
2832         mailBody.close();
2833         mailBody.open(bodyFileName, ofstream::out | ofstream::app);
2834
2835         TString logFileName = Form("%s/%s_%d.log", GetShuttleLogDir(), fCurrentDetector.Data(), GetCurrentRun());
2836         TString tailCommand = Form("tail -n 10 %s >> %s", logFileName.Data(), bodyFileName.Data());
2837         if (gSystem->Exec(tailCommand.Data()))
2838         {
2839                 mailBody << Form("%s log file not found ...\n\n", fCurrentDetector.Data());
2840         }
2841
2842         TString endBody = Form("------------------------------------------------------\n\n");
2843         endBody += Form("In case of problems please contact the SHUTTLE core team.\n\n");
2844         endBody += "Please do not answer this message directly, it is automatically generated.\n\n";
2845         endBody += "Greetings,\n\n \t\t\tthe SHUTTLE\n";
2846
2847         AliDebug(2, Form("Body end: %s", endBody.Data()));
2848
2849         mailBody << endBody.Data();
2850
2851         mailBody.close();
2852
2853         // send mail!
2854         TString mailCommand = Form("mail -s \"%s\" -c %s %s < %s",
2855                                                 subject.Data(),
2856                                                 cc.Data(),
2857                                                 to.Data(),
2858                                                 bodyFileName.Data());
2859         AliDebug(2, Form("mail command: %s", mailCommand.Data()));
2860
2861         Bool_t result = gSystem->Exec(mailCommand.Data());
2862
2863         return result == 0;
2864 }
2865
2866 //______________________________________________________________________________________________
2867 const char* AliShuttle::GetRunType()
2868 {
2869         //
2870         // returns run type read from "run type" logbook
2871         //
2872
2873         if(!fLogbookEntry) {
2874                 AliError("No logbook entry!");
2875                 return 0;
2876         }
2877
2878         return fLogbookEntry->GetRunType();
2879 }
2880
2881 //______________________________________________________________________________________________
2882 Bool_t AliShuttle::GetHLTStatus()
2883 {
2884         // Return HLT status (ON=1 OFF=0)
2885         // Converts the HLT status from the status string read in the run logbook (not just a bool)
2886
2887         if(!fLogbookEntry) {
2888                 AliError("No logbook entry!");
2889                 return 0;
2890         }
2891
2892         // TODO implement when HLTStatus is inserted in run logbook
2893         //TString hltStatus = fLogbookEntry->GetRunParameter("HLTStatus");
2894         //if(hltStatus == "OFF") {return kFALSE};
2895
2896         return kTRUE;
2897 }
2898
2899 //______________________________________________________________________________________________
2900 void AliShuttle::SetShuttleTempDir(const char* tmpDir)
2901 {
2902         //
2903         // sets Shuttle temp directory
2904         //
2905
2906         fgkShuttleTempDir = gSystem->ExpandPathName(tmpDir);
2907 }
2908
2909 //______________________________________________________________________________________________
2910 void AliShuttle::SetShuttleLogDir(const char* logDir)
2911 {
2912         //
2913         // sets Shuttle log directory
2914         //
2915
2916         fgkShuttleLogDir = gSystem->ExpandPathName(logDir);
2917 }