5a064fe971f1cbe5cec781742413f7d27f91d4ad
[u/mrichter/AliRoot.git] / SHUTTLE / AliShuttle.cxx
1 /**************************************************************************
2  * Copyright(c) 1998-1999, ALICE Experiment at CERN, All rights reserved. *
3  *                                                                        *
4  * Author: The ALICE Off-line Project.                                    *
5  * Contributors are mentioned in the code where appropriate.              *
6  *                                                                        *
7  * Permission to use, copy, modify and distribute this software and its   *
8  * documentation strictly for non-commercial purposes is hereby granted   *
9  * without fee, provided that the above copyright notice appears in all   *
10  * copies and that both the copyright notice and this permission notice   *
11  * appear in the supporting documentation. The authors make no claims     *
12  * about the suitability of this software for any purpose. It is          *
13  * provided "as is" without express or implied warranty.                  *
14  **************************************************************************/
15
16 /*
17 $Log$
18 Revision 1.71  2007/12/12 14:56:14  jgrosseo
19 sending shuttle_ignore to ML also in case of 0 events
20
21 Revision 1.70  2007/12/12 13:45:35  acolla
22 Monalisa started in Collect() function. Alive message to monitor is sent at each Collect and every minute during preprocessor processing.
23
24 Revision 1.69  2007/12/12 10:06:29  acolla
25 in AliShuttle.cxx: SHUTTLE logbook is updated in case of invalid run times:
26
27 time_start==0 && time_end==0
28
29 logbook is NOT updated if time_start != 0 && time_end == 0, because it may mean that the run is still ongoing.
30
31 Revision 1.68  2007/12/11 10:15:17  acolla
32 Added marking SHUTTLE=DONE for invalid runs
33 (invalid start time or end time) and runs with totalEvents < 1
34
35 Revision 1.67  2007/12/07 19:14:36  acolla
36 in AliShuttleTrigger:
37
38 Added automatic collection of new runs on a regular time basis (settable from the configuration)
39
40 in AliShuttleConfig: new members
41
42 - triggerWait: time to wait for DIM trigger (s) before starting automatic collection of new runs
43 - mode: run mode (test, prod) -> used to build log folder (logs or logs_PROD)
44
45 in AliShuttle:
46
47 - logs now stored in logs/#RUN/DET_#RUN.log
48
49 Revision 1.66  2007/12/05 10:45:19  jgrosseo
50 changed order of arguments to TMonaLisaWriter
51
52 Revision 1.65  2007/11/26 16:58:37  acolla
53 Monalisa configuration added: host and table name
54
55 Revision 1.64  2007/11/13 16:15:47  acolla
56 DCS map is stored in a file in the temp folder where the detector is processed.
57 If the preprocessor fails, the temp folder is not removed. This will help the debugging of the problem.
58
59 Revision 1.63  2007/11/02 10:53:16  acolla
60 Protection added to AliShuttle::CopyFileLocally
61
62 Revision 1.62  2007/10/31 18:23:13  acolla
63 Furter developement on the Shuttle:
64
65 - Shuttle now connects to the Grid as alidaq. The OCDB and Reference folders
66 are now built from /alice/data, e.g.:
67 /alice/data/2007/LHC07a/OCDB
68
69 the year and LHC period are taken from the Shuttle.
70 Raw metadata files are stored by GRP to:
71 /alice/data/2007/LHC07a/<runNb>/Raw/RunMetadata.root
72
73 - Shuttle sends a mail to DCS experts each time DP retrieval fails.
74
75 Revision 1.61  2007/10/30 20:33:51  acolla
76 Improved managing of temporary folders, which weren't correctly handled.
77 Resolved bug introduced in StoreReferenceFile, which caused SPD preprocessor fail.
78
79 Revision 1.60  2007/10/29 18:06:16  acolla
80
81 New function StoreRunMetadataFile added to preprocessor and Shuttle interface
82 This function can be used by GRP only. It stores raw data tags merged file to the
83 raw data folder (e.g. /alice/data/2008/LHC08a/000099999/Raw).
84
85 KNOWN ISSUES:
86
87 1. Shuttle cannot write to /alice/data/ because it belongs to alidaq. Tag file is stored in /alice/simulation/... for the time being.
88 2. Due to a bug in TAlien::Mkdir, the creation of a folder in recursive mode (-p option) does not work. The problem
89 has been corrected in the root package on the Shuttle machine.
90
91 Revision 1.59  2007/10/05 12:40:55  acolla
92
93 Result error code added to AliDCSClient data members (it was "lost" with the new implementation of TMap* GetAliasValues and GetDPValues).
94
95 Revision 1.58  2007/09/28 15:27:40  acolla
96
97 AliDCSClient "multiSplit" option added in the DCS configuration
98 in AliDCSMessage: variable MAX_BODY_SIZE set to 500000
99
100 Revision 1.57  2007/09/27 16:53:13  acolla
101 Detectors can have more than one AMANDA server. SHUTTLE queries the servers sequentially,
102 merges the dcs aliases/DPs in one TMap and sends it to the preprocessor.
103
104 Revision 1.56  2007/09/14 16:46:14  jgrosseo
105 1) Connect and Close are called before and after each query, so one can
106 keep the same AliDCSClient object.
107 2) The splitting of a query is moved to GetDPValues/GetAliasValues.
108 3) Splitting interval can be specified in constructor
109
110 Revision 1.55  2007/08/06 12:26:40  acolla
111 Function Bool_t GetHLTStatus added to preprocessor. It returns the status of HLT
112 read from the run logbook.
113
114 Revision 1.54  2007/07/12 09:51:25  jgrosseo
115 removed duplicated log message in GetFile
116
117 Revision 1.53  2007/07/12 09:26:28  jgrosseo
118 updating hlt fxs base path
119
120 Revision 1.52  2007/07/12 08:06:45  jgrosseo
121 adding log messages in getfile... functions
122 adding not implemented copy constructor in alishuttleconfigholder
123
124 Revision 1.51  2007/07/03 17:24:52  acolla
125 root moved to v5-16-00. TFileMerger->Cp moved to TFile::Cp.
126
127 Revision 1.50  2007/07/02 17:19:32  acolla
128 preprocessor is run in a temp directory that is removed when process is finished.
129
130 Revision 1.49  2007/06/29 10:45:06  acolla
131 Number of columns in MySql Shuttle logbook increased by one (HLT added)
132
133 Revision 1.48  2007/06/21 13:06:19  acolla
134 GetFileSources returns dummy list with 1 source if system=DCS (better than
135 returning error as it was)
136
137 Revision 1.47  2007/06/19 17:28:56  acolla
138 HLT updated; missing map bug removed.
139
140 Revision 1.46  2007/06/09 13:01:09  jgrosseo
141 Switching to retrieval of several DCS DPs at a time (multiDPrequest)
142
143 Revision 1.45  2007/05/30 06:35:20  jgrosseo
144 Adding functionality to the Shuttle/TestShuttle:
145 o) Function to retrieve list of sources from a given system (GetFileSources with id=0)
146 o) Function to retrieve list of IDs for a given source      (GetFileIDs)
147 These functions are needed for dealing with the tag files that are saved for the GRP preprocessor
148 Example code has been added to the TestProcessor in TestShuttle
149
150 Revision 1.44  2007/05/11 16:09:32  acolla
151 Reference files for ITS, MUON and PHOS are now stored in OfflineDetName/OnlineDetName/run_...
152 example: ITS/SPD/100_filename.root
153
154 Revision 1.43  2007/05/10 09:59:51  acolla
155 Various bug fixes in StoreRefFilesToGrid; Cleaning of reference storage before processing detector (CleanReferenceStorage)
156
157 Revision 1.42  2007/05/03 08:01:39  jgrosseo
158 typo in last commit :-(
159
160 Revision 1.41  2007/05/03 08:00:48  jgrosseo
161 fixing log message when pp want to skip dcs value retrieval
162
163 Revision 1.40  2007/04/27 07:06:48  jgrosseo
164 GetFileSources returns empty list in case of no files, but successful query
165 No mails sent in testmode
166
167 Revision 1.39  2007/04/17 12:43:57  acolla
168 Correction in StoreOCDB; change of text in mail to detector expert
169
170 Revision 1.38  2007/04/12 08:26:18  jgrosseo
171 updated comment
172
173 Revision 1.37  2007/04/10 16:53:14  jgrosseo
174 redirecting sub detector stdout, stderr to sub detector log file
175
176 Revision 1.35  2007/04/04 16:26:38  acolla
177 1. Re-organization of function calls in TestPreprocessor to make it more meaningful.
178 2. Added missing dependency in test preprocessors.
179 3. in AliShuttle.cxx: processing time and memory consumption info on a single line.
180
181 Revision 1.34  2007/04/04 10:33:36  jgrosseo
182 1) Storing of files to the Grid is now done _after_ your preprocessors succeeded. This is transparent, which means that you can still use the same functions (Store, StoreReferenceData) to store files to the Grid. However, the Shuttle first stores them locally and transfers them after the preprocessor finished. The return code of these two functions has changed from UInt_t to Bool_t which gives you the success of the storing.
183 In case of an error with the Grid, the Shuttle will retry the storing later, the preprocessor does not need to be run again.
184
185 2) The meaning of the return code of the preprocessor has changed. 0 is now success and any other value means failure. This value is stored in the log and you can use it to keep details about the error condition.
186
187 3) New function StoreReferenceFile to _directly_ store a file (without opening it) to the reference storage.
188
189 4) The memory usage of the preprocessor is monitored. If it exceeds 2 GB it is terminated.
190
191 5) New function AliPreprocessor::ProcessDCS(). If you do not need to have DCS data in all cases, you can skip the processing by implemting this function and returning kFALSE under certain conditions. E.g. if there is a certain run type.
192 If you always need DCS data (like before), you do not need to implement it.
193
194 6) The run type has been added to the monitoring page
195
196 Revision 1.33  2007/04/03 13:56:01  acolla
197 Grid Storage at the end of preprocessing. Added virtual method to disable DCS query according to the
198 run type.
199
200 Revision 1.32  2007/02/28 10:41:56  acolla
201 Run type field added in SHUTTLE framework. Run type is read from "run type" logbook and retrieved by
202 AliPreprocessor::GetRunType() function.
203 Added some ldap definition files.
204
205 Revision 1.30  2007/02/13 11:23:21  acolla
206 Moved getters and setters of Shuttle's main OCDB/Reference, local
207 OCDB/Reference, temp and log folders to AliShuttleInterface
208
209 Revision 1.27  2007/01/30 17:52:42  jgrosseo
210 adding monalisa monitoring
211
212 Revision 1.26  2007/01/23 19:20:03  acolla
213 Removed old ldif files, added TOF, MCH ldif files. Added some options in
214 AliShuttleConfig::Print. Added in Ali Shuttle: SetShuttleTempDir and
215 SetShuttleLogDir
216
217 Revision 1.25  2007/01/15 19:13:52  acolla
218 Moved some AliInfo to AliDebug in SendMail function
219
220 Revision 1.21  2006/12/07 08:51:26  jgrosseo
221 update (alberto):
222 table, db names in ldap configuration
223 added GRP preprocessor
224 DCS data can also be retrieved by data point
225
226 Revision 1.20  2006/11/16 16:16:48  jgrosseo
227 introducing strict run ordering flag
228 removed giving preprocessor name to preprocessor, they have to know their name themselves ;-)
229
230 Revision 1.19  2006/11/06 14:23:04  jgrosseo
231 major update (Alberto)
232 o) reading of run parameters from the logbook
233 o) online offline naming conversion
234 o) standalone DCSclient package
235
236 Revision 1.18  2006/10/20 15:22:59  jgrosseo
237 o) Adding time out to the execution of the preprocessors: The Shuttle forks and the parent process monitors the child
238 o) Merging Collect, CollectAll, CollectNew function
239 o) Removing implementation of empty copy constructors (declaration still there!)
240
241 Revision 1.17  2006/10/05 16:20:55  jgrosseo
242 adapting to new CDB classes
243
244 Revision 1.16  2006/10/05 15:46:26  jgrosseo
245 applying to the new interface
246
247 Revision 1.15  2006/10/02 16:38:39  jgrosseo
248 update (alberto):
249 fixed memory leaks
250 storing of objects that failed to be stored to the grid before
251 interfacing of shuttle status table in daq system
252
253 Revision 1.14  2006/08/29 09:16:05  jgrosseo
254 small update
255
256 Revision 1.13  2006/08/15 10:50:00  jgrosseo
257 effc++ corrections (alberto)
258
259 Revision 1.12  2006/08/08 14:19:29  jgrosseo
260 Update to shuttle classes (Alberto)
261
262 - Possibility to set the full object's path in the Preprocessor's and
263 Shuttle's  Store functions
264 - Possibility to extend the object's run validity in the same classes
265 ("startValidity" and "validityInfinite" parameters)
266 - Implementation of the StoreReferenceData function to store reference
267 data in a dedicated CDB storage.
268
269 Revision 1.11  2006/07/21 07:37:20  jgrosseo
270 last run is stored after each run
271
272 Revision 1.10  2006/07/20 09:54:40  jgrosseo
273 introducing status management: The processing per subdetector is divided into several steps,
274 after each step the status is stored on disk. If the system crashes in any of the steps the Shuttle
275 can keep track of the number of failures and skips further processing after a certain threshold is
276 exceeded. These thresholds can be configured in LDAP.
277
278 Revision 1.9  2006/07/19 10:09:55  jgrosseo
279 new configuration, accesst to DAQ FES (Alberto)
280
281 Revision 1.8  2006/07/11 12:44:36  jgrosseo
282 adding parameters for extended validity range of data produced by preprocessor
283
284 Revision 1.7  2006/07/10 14:37:09  jgrosseo
285 small fix + todo comment
286
287 Revision 1.6  2006/07/10 13:01:41  jgrosseo
288 enhanced storing of last sucessfully processed run (alberto)
289
290 Revision 1.5  2006/07/04 14:59:57  jgrosseo
291 revision of AliDCSValue: Removed wrapper classes, reduced storage size per value by factor 2
292
293 Revision 1.4  2006/06/12 09:11:16  jgrosseo
294 coding conventions (Alberto)
295
296 Revision 1.3  2006/06/06 14:26:40  jgrosseo
297 o) removed files that were moved to STEER
298 o) shuttle updated to follow the new interface (Alberto)
299
300 Revision 1.2  2006/03/07 07:52:34  hristov
301 New version (B.Yordanov)
302
303 Revision 1.6  2005/11/19 17:19:14  byordano
304 RetrieveDATEEntries and RetrieveConditionsData added
305
306 Revision 1.5  2005/11/19 11:09:27  byordano
307 AliShuttle declaration added
308
309 Revision 1.4  2005/11/17 17:47:34  byordano
310 TList changed to TObjArray
311
312 Revision 1.3  2005/11/17 14:43:23  byordano
313 import to local CVS
314
315 Revision 1.1.1.1  2005/10/28 07:33:58  hristov
316 Initial import as subdirectory in AliRoot
317
318 Revision 1.2  2005/09/13 08:41:15  byordano
319 default startTime endTime added
320
321 Revision 1.4  2005/08/30 09:13:02  byordano
322 some docs added
323
324 Revision 1.3  2005/08/29 21:15:47  byordano
325 some docs added
326
327 */
328
329 //
330 // This class is the main manager for AliShuttle. 
331 // It organizes the data retrieval from DCS and call the 
332 // interface methods of AliPreprocessor.
333 // For every detector in AliShuttleConfgi (see AliShuttleConfig),
334 // data for its set of aliases is retrieved. If there is registered
335 // AliPreprocessor for this detector then it will be used
336 // accroding to the schema (see AliPreprocessor).
337 // If there isn't registered AliPreprocessor than the retrieved
338 // data is stored automatically to the undelying AliCDBStorage.
339 // For detSpec is used the alias name.
340 //
341
342 #include "AliShuttle.h"
343
344 #include "AliCDBManager.h"
345 #include "AliCDBStorage.h"
346 #include "AliCDBId.h"
347 #include "AliCDBRunRange.h"
348 #include "AliCDBPath.h"
349 #include "AliCDBEntry.h"
350 #include "AliShuttleConfig.h"
351 #include "DCSClient/AliDCSClient.h"
352 #include "AliLog.h"
353 #include "AliPreprocessor.h"
354 #include "AliShuttleStatus.h"
355 #include "AliShuttleLogbookEntry.h"
356
357 #include <TSystem.h>
358 #include <TObject.h>
359 #include <TString.h>
360 #include <TTimeStamp.h>
361 #include <TObjString.h>
362 #include <TSQLServer.h>
363 #include <TSQLResult.h>
364 #include <TSQLRow.h>
365 #include <TMutex.h>
366 #include <TSystemDirectory.h>
367 #include <TSystemFile.h>
368 #include <TFile.h>
369 #include <TGrid.h>
370 #include <TGridResult.h>
371
372 #include <TMonaLisaWriter.h>
373
374 #include <fstream>
375
376 #include <sys/types.h>
377 #include <sys/wait.h>
378
379 ClassImp(AliShuttle)
380
381 //______________________________________________________________________________________________
382 AliShuttle::AliShuttle(const AliShuttleConfig* config,
383                 UInt_t timeout, Int_t retries):
384 fConfig(config),
385 fTimeout(timeout), fRetries(retries),
386 fPreprocessorMap(),
387 fLogbookEntry(0),
388 fCurrentDetector(),
389 fStatusEntry(0),
390 fMonitoringMutex(0),
391 fLastActionTime(0),
392 fLastAction(),
393 fMonaLisa(0),
394 fTestMode(kNone),
395 fReadTestMode(kFALSE),
396 fOutputRedirected(kFALSE)
397 {
398         //
399         // config: AliShuttleConfig used
400         // timeout: timeout used for AliDCSClient connection
401         // retries: the number of retries in case of connection error.
402         //
403
404         if (!fConfig->IsValid()) AliFatal("********** !!!!! Invalid configuration !!!!! **********");
405         for(int iSys=0;iSys<4;iSys++) {
406                 fServer[iSys]=0;
407                 if (iSys < 3)
408                         fFXSlist[iSys].SetOwner(kTRUE);
409         }
410         fPreprocessorMap.SetOwner(kTRUE);
411
412         for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
413                 fFirstUnprocessed[iDet] = kFALSE;
414
415         fMonitoringMutex = new TMutex();
416 }
417
418 //______________________________________________________________________________________________
419 AliShuttle::~AliShuttle()
420 {
421         //
422         // destructor
423         //
424
425         fPreprocessorMap.DeleteAll();
426         for(int iSys=0;iSys<4;iSys++)
427                 if(fServer[iSys]) {
428                         fServer[iSys]->Close();
429                         delete fServer[iSys];
430                         fServer[iSys] = 0;
431                 }
432
433         if (fStatusEntry){
434                 delete fStatusEntry;
435                 fStatusEntry = 0;
436         }
437         
438         if (fMonitoringMutex) 
439         {
440                 delete fMonitoringMutex;
441                 fMonitoringMutex = 0;
442         }
443 }
444
445 //______________________________________________________________________________________________
446 void AliShuttle::RegisterPreprocessor(AliPreprocessor* preprocessor)
447 {
448         //
449         // Registers new AliPreprocessor.
450         // It uses GetName() for indentificator of the pre processor.
451         // The pre processor is registered it there isn't any other
452         // with the same identificator (GetName()).
453         //
454
455         const char* detName = preprocessor->GetName();
456         if(GetDetPos(detName) < 0)
457                 AliFatal(Form("********** !!!!! Invalid detector name: %s !!!!! **********", detName));
458
459         if (fPreprocessorMap.GetValue(detName)) {
460                 AliWarning(Form("AliPreprocessor %s is already registered!", detName));
461                 return;
462         }
463
464         fPreprocessorMap.Add(new TObjString(detName), preprocessor);
465 }
466 //______________________________________________________________________________________________
467 Bool_t AliShuttle::Store(const AliCDBPath& path, TObject* object,
468                 AliCDBMetaData* metaData, Int_t validityStart, Bool_t validityInfinite)
469 {
470         // Stores a CDB object in the storage for offline reconstruction. Objects that are not needed for
471         // offline reconstruction, but should be stored anyway (e.g. for debugging) should NOT be stored
472         // using this function. Use StoreReferenceData instead!
473         // It calls StoreLocally function which temporarily stores the data locally; when the preprocessor
474         // finishes the data are transferred to the main storage (Grid).
475
476         return StoreLocally(fgkLocalCDB, path, object, metaData, validityStart, validityInfinite);
477 }
478
479 //______________________________________________________________________________________________
480 Bool_t AliShuttle::StoreReferenceData(const AliCDBPath& path, TObject* object, AliCDBMetaData* metaData)
481 {
482         // Stores a CDB object in the storage for reference data. This objects will not be available during
483         // offline reconstrunction. Use this function for reference data only!
484         // It calls StoreLocally function which temporarily stores the data locally; when the preprocessor
485         // finishes the data are transferred to the main storage (Grid).
486
487         return StoreLocally(fgkLocalRefStorage, path, object, metaData);
488 }
489
490 //______________________________________________________________________________________________
491 Bool_t AliShuttle::StoreLocally(const TString& localUri,
492                         const AliCDBPath& path, TObject* object, AliCDBMetaData* metaData,
493                         Int_t validityStart, Bool_t validityInfinite)
494 {
495         // Store object temporarily in local storage. Parameters are passed by Store and StoreReferenceData functions.
496         // when the preprocessor finishes the data are transferred to the main storage (Grid).
497         // The parameters are:
498         //   1) Uri of the backup storage (Local)
499         //   2) the object's path.
500         //   3) the object to be stored
501         //   4) the metaData to be associated with the object
502         //   5) the validity start run number w.r.t. the current run,
503         //      if the data is valid only for this run leave the default 0
504         //   6) specifies if the calibration data is valid for infinity (this means until updated),
505         //      typical for calibration runs, the default is kFALSE
506         //
507         // returns 0 if fail, 1 otherwise
508
509         if (fTestMode & kErrorStorage)
510         {
511                 Log(fCurrentDetector, "StoreLocally - In TESTMODE - Simulating error while storing locally");
512                 return kFALSE;
513         }
514         
515         const char* cdbType = (localUri == fgkLocalCDB) ? "CDB" : "Reference";
516
517         Int_t firstRun = GetCurrentRun() - validityStart;
518         if(firstRun < 0) {
519                 AliWarning("First valid run happens to be less than 0! Setting it to 0.");
520                 firstRun=0;
521         }
522
523         Int_t lastRun = -1;
524         if(validityInfinite) {
525                 lastRun = AliCDBRunRange::Infinity();
526         } else {
527                 lastRun = GetCurrentRun();
528         }
529
530         // Version is set to current run, it will be used later to transfer data to Grid
531         AliCDBId id(path, firstRun, lastRun, GetCurrentRun(), -1);
532
533         if(! dynamic_cast<TObjString*> (metaData->GetProperty("RunUsed(TObjString)"))){
534                 TObjString runUsed = Form("%d", GetCurrentRun());
535                 metaData->SetProperty("RunUsed(TObjString)", runUsed.Clone());
536         }
537
538         Bool_t result = kFALSE;
539
540         if (!(AliCDBManager::Instance()->GetStorage(localUri))) {
541                 Log("SHUTTLE", Form("StoreLocally - Cannot activate local %s storage", cdbType));
542         } else {
543                 result = AliCDBManager::Instance()->GetStorage(localUri)
544                                         ->Put(object, id, metaData);
545         }
546
547         if(!result) {
548
549                 Log(fCurrentDetector, Form("StoreLocally - Can't store object <%s>!", id.ToString().Data()));
550         }
551
552         return result;
553 }
554
555 //______________________________________________________________________________________________
556 Bool_t AliShuttle::StoreOCDB()
557 {
558         //
559         // Called when preprocessor ends successfully or when previous storage attempt failed (kStoreError status)
560         // Calls underlying StoreOCDB(const char*) function twice, for OCDB and Reference storage.
561         // Then calls StoreRefFilesToGrid to store reference files. 
562         //
563         
564         if (fTestMode & kErrorGrid)
565         {
566                 Log("SHUTTLE", "StoreOCDB - In TESTMODE - Simulating error while storing in the Grid");
567                 Log(fCurrentDetector, "StoreOCDB - In TESTMODE - Simulating error while storing in the Grid");
568                 return kFALSE;
569         }
570         
571         Log("SHUTTLE","StoreOCDB - Storing OCDB data ...");
572         Bool_t resultCDB = StoreOCDB(fgkMainCDB);
573
574         Log("SHUTTLE","StoreOCDB - Storing reference data ...");
575         Bool_t resultRef = StoreOCDB(fgkMainRefStorage);
576         
577         Log("SHUTTLE","StoreOCDB - Storing reference files ...");
578         Bool_t resultRefFiles = CopyFilesToGrid("reference");
579         
580         Bool_t resultMetadata = kTRUE;
581         if(fCurrentDetector == "GRP") 
582         {
583                 Log("StoreOCDB - SHUTTLE","Storing Run Metadata file ...");
584                 resultMetadata = CopyFilesToGrid("metadata");
585         }
586         
587         return resultCDB && resultRef && resultRefFiles && resultMetadata;
588 }
589
590 //______________________________________________________________________________________________
591 Bool_t AliShuttle::StoreOCDB(const TString& gridURI)
592 {
593         //
594         // Called by StoreOCDB(), performs actual storage to the main OCDB and reference storages (Grid)
595         //
596
597         TObjArray* gridIds=0;
598
599         Bool_t result = kTRUE;
600
601         const char* type = 0;
602         TString localURI;
603         if(gridURI == fgkMainCDB) {
604                 type = "OCDB";
605                 localURI = fgkLocalCDB;
606         } else if(gridURI == fgkMainRefStorage) {
607                 type = "reference";
608                 localURI = fgkLocalRefStorage;
609         } else {
610                 AliError(Form("Invalid storage URI: %s", gridURI.Data()));
611                 return kFALSE;
612         }
613
614         AliCDBManager* man = AliCDBManager::Instance();
615
616         AliCDBStorage *gridSto = man->GetStorage(gridURI);
617         if(!gridSto) {
618                 Log("SHUTTLE",
619                         Form("StoreOCDB - cannot activate main %s storage", type));
620                 return kFALSE;
621         }
622
623         gridIds = gridSto->GetQueryCDBList();
624
625         // get objects previously stored in local CDB
626         AliCDBStorage *localSto = man->GetStorage(localURI);
627         if(!localSto) {
628                 Log("SHUTTLE",
629                         Form("StoreOCDB - cannot activate local %s storage", type));
630                 return kFALSE;
631         }
632         AliCDBPath aPath(GetOfflineDetName(fCurrentDetector.Data()),"*","*");
633         // Local objects were stored with current run as Grid version!
634         TList* localEntries = localSto->GetAll(aPath.GetPath(), GetCurrentRun(), GetCurrentRun());
635         localEntries->SetOwner(1);
636
637         // loop on local stored objects
638         TIter localIter(localEntries);
639         AliCDBEntry *aLocEntry = 0;
640         while((aLocEntry = dynamic_cast<AliCDBEntry*> (localIter.Next()))){
641                 aLocEntry->SetOwner(1);
642                 AliCDBId aLocId = aLocEntry->GetId();
643                 aLocEntry->SetVersion(-1);
644                 aLocEntry->SetSubVersion(-1);
645
646                 // If local object is valid up to infinity we store it only if it is
647                 // the first unprocessed run!
648                 if (aLocId.GetLastRun() == AliCDBRunRange::Infinity() &&
649                         !fFirstUnprocessed[GetDetPos(fCurrentDetector)])
650                 {
651                         Log("SHUTTLE", Form("StoreOCDB - %s: object %s has validity infinite but "
652                                                 "there are previous unprocessed runs!",
653                                                 fCurrentDetector.Data(), aLocId.GetPath().Data()));
654                         continue;
655                 }
656
657                 // loop on Grid valid Id's
658                 Bool_t store = kTRUE;
659                 TIter gridIter(gridIds);
660                 AliCDBId* aGridId = 0;
661                 while((aGridId = dynamic_cast<AliCDBId*> (gridIter.Next()))){
662                         if(aGridId->GetPath() != aLocId.GetPath()) continue;
663                         // skip all objects valid up to infinity
664                         if(aGridId->GetLastRun() == AliCDBRunRange::Infinity()) continue;
665                         // if we get here, it means there's already some more recent object stored on Grid!
666                         store = kFALSE;
667                         break;
668                 }
669
670                 // If we get here, the file can be stored!
671                 Bool_t storeOk = gridSto->Put(aLocEntry);
672                 if(!store || storeOk){
673
674                         if (!store)
675                         {
676                                 Log(fCurrentDetector.Data(),
677                                         Form("StoreOCDB - A more recent object already exists in %s storage: <%s>",
678                                                 type, aGridId->ToString().Data()));
679                         } else {
680                                 Log("SHUTTLE",
681                                         Form("StoreOCDB - Object <%s> successfully put into %s storage",
682                                                 aLocId.ToString().Data(), type));
683                                 Log(fCurrentDetector.Data(),
684                                         Form("StoreOCDB - Object <%s> successfully put into %s storage",
685                                                 aLocId.ToString().Data(), type));
686                         }
687
688                         // removing local filename...
689                         TString filename;
690                         localSto->IdToFilename(aLocId, filename);
691                         Log("SHUTTLE", Form("StoreOCDB - Removing local file %s", filename.Data()));
692                         RemoveFile(filename.Data());
693                         continue;
694                 } else  {
695                         Log("SHUTTLE",
696                                 Form("StoreOCDB - Grid %s storage of object <%s> failed",
697                                         type, aLocId.ToString().Data()));
698                         Log(fCurrentDetector.Data(),
699                                 Form("StoreOCDB - Grid %s storage of object <%s> failed",
700                                         type, aLocId.ToString().Data()));
701                         result = kFALSE;
702                 }
703         }
704         localEntries->Clear();
705
706         return result;
707 }
708
709 //______________________________________________________________________________________________
710 Bool_t AliShuttle::CleanReferenceStorage(const char* detector)
711 {
712         // clears the directory used to store reference files of a given subdetector
713   
714         AliCDBManager* man = AliCDBManager::Instance();
715         AliCDBStorage* sto = man->GetStorage(fgkLocalRefStorage);
716         TString localBaseFolder = sto->GetBaseFolder();
717
718         TString targetDir = GetRefFilePrefix(localBaseFolder.Data(), detector);
719         
720         Log("SHUTTLE", Form("CleanReferenceStorage - Cleaning %s", targetDir.Data()));
721
722         TString begin;
723         begin.Form("%d_", GetCurrentRun());
724         
725         TSystemDirectory* baseDir = new TSystemDirectory("/", targetDir);
726         if (!baseDir)
727                 return kTRUE;
728                 
729         TList* dirList = baseDir->GetListOfFiles();
730         delete baseDir;
731         
732         if (!dirList) return kTRUE;
733                         
734         if (dirList->GetEntries() < 3) 
735         {
736                 delete dirList;
737                 return kTRUE;
738         }
739                                 
740         Int_t nDirs = 0, nDel = 0;
741         TIter dirIter(dirList);
742         TSystemFile* entry = 0;
743
744         Bool_t success = kTRUE;
745         
746         while ((entry = dynamic_cast<TSystemFile*> (dirIter.Next())))
747         {                                       
748                 if (entry->IsDirectory())
749                         continue;
750                 
751                 TString fileName(entry->GetName());
752                 if (!fileName.BeginsWith(begin))
753                         continue;
754                         
755                 nDirs++;
756                                                 
757                 // delete file
758                 Int_t result = gSystem->Unlink(fileName.Data());
759                 
760                 if (result)
761                 {
762                         Log("SHUTTLE", Form("CleanReferenceStorage - Could not delete file %s!", fileName.Data()));
763                         success = kFALSE;
764                 } else {
765                         nDel++;
766                 }
767         }
768
769         if(nDirs > 0)
770                 Log("SHUTTLE", Form("CleanReferenceStorage - %d (over %d) reference files in folder %s were deleted.", 
771                         nDel, nDirs, targetDir.Data()));
772
773                 
774         delete dirList;
775         return success;
776
777
778
779
780
781
782   Int_t result = gSystem->GetPathInfo(targetDir, 0, (Long64_t*) 0, 0, 0);
783   if (result == 0)
784   {
785     // delete directory
786     result = gSystem->Exec(Form("rm -rf %s", targetDir.Data()));
787     if (result != 0)
788     {  
789       Log("SHUTTLE", Form("CleanReferenceStorage - Could not clean directory %s", targetDir.Data()));
790       return kFALSE;
791     }
792   }
793
794   result = gSystem->mkdir(targetDir, kTRUE);
795   if (result != 0)
796   {
797     Log("SHUTTLE", Form("CleanReferenceStorage - Error creating base directory %s", targetDir.Data()));
798     return kFALSE;
799   }
800         
801   return kTRUE;
802 }
803
804 //______________________________________________________________________________________________
805 Bool_t AliShuttle::StoreReferenceFile(const char* detector, const char* localFile, const char* gridFileName)
806 {
807         //
808         // Stores reference file directly (without opening it). This function stores the file locally.
809         //
810         // The file is stored under the following location: 
811         // <base folder of local reference storage>/<DET>/<RUN#>_<gridFileName>
812         // where <gridFileName> is the second parameter given to the function
813         // 
814         
815         if (fTestMode & kErrorStorage)
816         {
817                 Log(fCurrentDetector, "StoreReferenceFile - In TESTMODE - Simulating error while storing locally");
818                 return kFALSE;
819         }
820         
821         AliCDBManager* man = AliCDBManager::Instance();
822         AliCDBStorage* sto = man->GetStorage(fgkLocalRefStorage);
823         
824         TString localBaseFolder = sto->GetBaseFolder();
825         
826         TString target = GetRefFilePrefix(localBaseFolder.Data(), detector);    
827         target.Append(Form("/%d_%s", GetCurrentRun(), gridFileName));
828         
829         return CopyFileLocally(localFile, target);
830 }
831
832 //______________________________________________________________________________________________
833 Bool_t AliShuttle::StoreRunMetadataFile(const char* localFile, const char* gridFileName)
834 {
835         //
836         // Stores Run metadata file to the Grid, in the run folder
837         //
838         // Only GRP can call this function.
839         
840         if (fTestMode & kErrorStorage)
841         {
842                 Log(fCurrentDetector, "StoreRunMetaDataFile - In TESTMODE - Simulating error while storing locally");
843                 return kFALSE;
844         }
845         
846         AliCDBManager* man = AliCDBManager::Instance();
847         AliCDBStorage* sto = man->GetStorage(fgkLocalRefStorage);
848         
849         TString localBaseFolder = sto->GetBaseFolder();
850         
851         // Build Run level folder
852         // folder = /alice/data/year/lhcPeriod/runNb/Raw
853         
854                 
855         TString lhcPeriod = GetLHCPeriod();     
856         if (lhcPeriod.Length() == 0) 
857         {
858                 Log("SHUTTLE","StoreRunMetaDataFile - LHCPeriod not found in logbook!");
859                 return 0;
860         }
861         
862         TString target = Form("%s/GRP/RunMetadata/alice/data/%d/%s/%09d/Raw/%s", 
863                                 localBaseFolder.Data(), GetCurrentYear(), 
864                                 lhcPeriod.Data(), GetCurrentRun(), gridFileName);
865                                         
866         return CopyFileLocally(localFile, target);
867 }
868
869 //______________________________________________________________________________________________
870 Bool_t AliShuttle::CopyFileLocally(const char* localFile, const TString& target)
871 {
872         //
873         // Stores file locally. Called by StoreReferenceFile and StoreRunMetadataFile
874         // Files are temporarily stored in the local reference storage. When the preprocessor 
875         // finishes, the Shuttle calls CopyFilesToGrid to transfer the files to AliEn 
876         // (in reference or run level folders)
877         //
878         
879         TString targetDir(target(0, target.Last('/')));
880         
881         //try to open base dir folder, if it does not exist
882         void* dir = gSystem->OpenDirectory(targetDir.Data());
883         if (dir == NULL) {
884                 if (gSystem->mkdir(targetDir.Data(), kTRUE)) {
885                         Log("SHUTTLE", Form("StoreFileLocally - Can't open directory <%s>", targetDir.Data()));
886                         return kFALSE;
887                 }
888
889         } else {
890                 gSystem->FreeDirectory(dir);
891         }
892         
893         Int_t result = 0;
894         
895         result = gSystem->GetPathInfo(localFile, 0, (Long64_t*) 0, 0, 0);
896         if (result)
897         {
898                 Log("SHUTTLE", Form("StoreFileLocally - %s does not exist", localFile));
899                 return kFALSE;
900         }
901
902         result = gSystem->GetPathInfo(target, 0, (Long64_t*) 0, 0, 0);
903         if (!result)
904         {
905                 Log("SHUTTLE", Form("StoreFileLocally - target file %s already exist, removing...", target.Data()));
906                 if (gSystem->Unlink(target.Data()))
907                 {
908                         Log("SHUTTLE", Form("StoreFileLocally - Could not remove existing target file %s!", target.Data()));
909                         return kFALSE;
910                 }
911         }       
912         
913         result = gSystem->CopyFile(localFile, target);
914
915         if (result == 0)
916         {
917                 Log("SHUTTLE", Form("StoreFileLocally - File %s stored locally to %s", localFile, target.Data()));
918                 return kTRUE;
919         }
920         else
921         {
922                 Log("SHUTTLE", Form("StoreFileLocally - Could not store file %s to %s! Error code = %d", 
923                                 localFile, target.Data(), result));
924                 return kFALSE;
925         }       
926
927
928
929 }
930
931 //______________________________________________________________________________________________
932 Bool_t AliShuttle::CopyFilesToGrid(const char* type)
933 {
934         //
935         // Transfers local files to the Grid. Local files can be reference files 
936         // or run metadata file (from GRP only).
937         //
938         // According to the type (ref, metadata) the files are stored under the following location: 
939         // ref --> <base folder of reference storage>/<DET>/<RUN#>_<gridFileName>
940         // metadata --> <run data folder>/<MetadataFileName>
941         //
942                 
943         AliCDBManager* man = AliCDBManager::Instance();
944         AliCDBStorage* sto = man->GetStorage(fgkLocalRefStorage);
945         if (!sto)
946                 return kFALSE;
947         TString localBaseFolder = sto->GetBaseFolder();
948         
949         TString dir;
950         TString alienDir;
951         TString begin;
952         
953         if (strcmp(type, "reference") == 0) 
954         {
955                 dir = GetRefFilePrefix(localBaseFolder.Data(), fCurrentDetector.Data());
956                 AliCDBStorage* gridSto = man->GetStorage(fgkMainRefStorage);
957                 if (!gridSto)
958                         return kFALSE;
959                 TString gridBaseFolder = gridSto->GetBaseFolder();
960                 alienDir = GetRefFilePrefix(gridBaseFolder.Data(), fCurrentDetector.Data());
961                 begin = Form("%d_", GetCurrentRun());
962         } 
963         else if (strcmp(type, "metadata") == 0)
964         {
965                         
966                 TString lhcPeriod = GetLHCPeriod();
967         
968                 if (lhcPeriod.Length() == 0) 
969                 {
970                         Log("SHUTTLE","CopyFilesToGrid - LHCPeriod not found in logbook!");
971                         return 0;
972                 }
973                 
974                 dir = Form("%s/GRP/RunMetadata/alice/data/%d/%s/%09d/Raw", 
975                                 localBaseFolder.Data(), GetCurrentYear(), 
976                                 lhcPeriod.Data(), GetCurrentRun());
977                 alienDir = dir(dir.Index("/alice/data/"), dir.Length());
978                 
979                 begin = "";
980         }
981         else 
982         {
983                 Log("SHUTTLE", "CopyFilesToGrid - Unexpected: type label must be reference or metadata!");
984                 return kFALSE;
985         }
986                 
987         TSystemDirectory* baseDir = new TSystemDirectory("/", dir);
988         if (!baseDir)
989                 return kTRUE;
990                 
991         TList* dirList = baseDir->GetListOfFiles();
992         delete baseDir;
993         
994         if (!dirList) return kTRUE;
995                 
996         if (dirList->GetEntries() < 3) 
997         {
998                 delete dirList;
999                 return kTRUE;
1000         }
1001                         
1002         if (!gGrid)
1003         { 
1004                 Log("SHUTTLE", "CopyFilesToGrid - Connection to Grid failed: Cannot continue!");
1005                 delete dirList;
1006                 return kFALSE;
1007         }
1008         
1009         Int_t nDirs = 0, nTransfer = 0;
1010         TIter dirIter(dirList);
1011         TSystemFile* entry = 0;
1012
1013         Bool_t success = kTRUE;
1014         Bool_t first = kTRUE;
1015         
1016         while ((entry = dynamic_cast<TSystemFile*> (dirIter.Next())))
1017         {                       
1018                 if (entry->IsDirectory())
1019                         continue;
1020                         
1021                 TString fileName(entry->GetName());
1022                 if (!fileName.BeginsWith(begin))
1023                         continue;
1024                         
1025                 nDirs++;
1026                         
1027                 if (first)
1028                 {
1029                         first = kFALSE;
1030                         // check that folder exists, otherwise create it
1031                         TGridResult* result = gGrid->Ls(alienDir.Data(), "a");
1032                         
1033                         if (!result)
1034                         {
1035                                 delete dirList;
1036                                 return kFALSE;
1037                         }
1038                         
1039                         if (!result->GetFileName(1)) // TODO: It looks like element 0 is always 0!!
1040                         {
1041                                 // TODO It does not work currently! Bug in TAliEn::Mkdir
1042                                 // TODO Manually fixed in local root v5-16-00
1043                                 if (!gGrid->Mkdir(alienDir.Data(),"-p",0))
1044                                 {
1045                                         Log("SHUTTLE", Form("CopyFilesToGrid - Cannot create directory %s",
1046                                                         alienDir.Data()));
1047                                         delete dirList;
1048                                         return kFALSE;
1049                                 } else {
1050                                         Log("SHUTTLE",Form("CopyFilesToGrid - Folder %s created", alienDir.Data()));
1051                                 }
1052                                 
1053                         } else {
1054                                         Log("SHUTTLE",Form("CopyFilesToGrid - Folder %s found", alienDir.Data()));
1055                         }
1056                 }
1057                         
1058                 TString fullLocalPath;
1059                 fullLocalPath.Form("%s/%s", dir.Data(), fileName.Data());
1060                 
1061                 TString fullGridPath;
1062                 fullGridPath.Form("alien://%s/%s", alienDir.Data(), fileName.Data());
1063
1064                 Bool_t result = TFile::Cp(fullLocalPath, fullGridPath);
1065                 
1066                 if (result)
1067                 {
1068                         Log("SHUTTLE", Form("CopyFilesToGrid - Copying local file %s to %s succeeded!", 
1069                                                 fullLocalPath.Data(), fullGridPath.Data()));
1070                         RemoveFile(fullLocalPath);
1071                         nTransfer++;
1072                 }
1073                 else
1074                 {
1075                         Log("SHUTTLE", Form("CopyFilesToGrid - Copying local file %s to %s FAILED!", 
1076                                                 fullLocalPath.Data(), fullGridPath.Data()));
1077                         success = kFALSE;
1078                 }
1079         }
1080
1081         Log("SHUTTLE", Form("CopyFilesToGrid - %d (over %d) files in folder %s copied to Grid.", 
1082                                                 nTransfer, nDirs, dir.Data()));
1083
1084                 
1085         delete dirList;
1086         return success;
1087 }
1088
1089 //______________________________________________________________________________________________
1090 const char* AliShuttle::GetRefFilePrefix(const char* base, const char* detector)
1091 {
1092         //
1093         // Get folder name of reference files 
1094         //
1095
1096         TString offDetStr(GetOfflineDetName(detector));
1097         TString dir;
1098         if (offDetStr == "ITS" || offDetStr == "MUON" || offDetStr == "PHOS")
1099         {
1100                 dir.Form("%s/%s/%s", base, offDetStr.Data(), detector);
1101         } else {
1102                 dir.Form("%s/%s", base, offDetStr.Data());
1103         }
1104         
1105         return dir.Data();
1106         
1107
1108 }
1109
1110 //______________________________________________________________________________________________
1111 void AliShuttle::CleanLocalStorage(const TString& uri)
1112 {
1113         //
1114         // Called in case the preprocessor is declared failed. Remove remaining objects from the local storages.
1115         //
1116
1117         const char* type = 0;
1118         if(uri == fgkLocalCDB) {
1119                 type = "OCDB";
1120         } else if(uri == fgkLocalRefStorage) {
1121                 type = "Reference";
1122         } else {
1123                 AliError(Form("Invalid storage URI: %s", uri.Data()));
1124                 return;
1125         }
1126
1127         AliCDBManager* man = AliCDBManager::Instance();
1128
1129         // open local storage
1130         AliCDBStorage *localSto = man->GetStorage(uri);
1131         if(!localSto) {
1132                 Log("SHUTTLE",
1133                         Form("CleanLocalStorage - cannot activate local %s storage", type));
1134                 return;
1135         }
1136
1137         TString filename(Form("%s/%s/*/Run*_v%d_s*.root",
1138                 localSto->GetBaseFolder().Data(), GetOfflineDetName(fCurrentDetector.Data()), GetCurrentRun()));
1139
1140         AliDebug(2, Form("filename = %s", filename.Data()));
1141
1142         Log("SHUTTLE", Form("Removing remaining local files for run %d and detector %s ...",
1143                 GetCurrentRun(), fCurrentDetector.Data()));
1144
1145         RemoveFile(filename.Data());
1146
1147 }
1148
1149 //______________________________________________________________________________________________
1150 void AliShuttle::RemoveFile(const char* filename)
1151 {
1152         //
1153         // removes local file
1154         //
1155
1156         TString command(Form("rm -f %s", filename));
1157
1158         Int_t result = gSystem->Exec(command.Data());
1159         if(result != 0)
1160         {
1161                 Log("SHUTTLE", Form("RemoveFile - %s: Cannot remove file %s!",
1162                         fCurrentDetector.Data(), filename));
1163         }
1164 }
1165
1166 //______________________________________________________________________________________________
1167 AliShuttleStatus* AliShuttle::ReadShuttleStatus()
1168 {
1169         //
1170         // Reads the AliShuttleStatus from the CDB
1171         //
1172
1173         if (fStatusEntry){
1174                 delete fStatusEntry;
1175                 fStatusEntry = 0;
1176         }
1177
1178         fStatusEntry = AliCDBManager::Instance()->GetStorage(GetLocalCDB())
1179                 ->Get(Form("/SHUTTLE/STATUS/%s", fCurrentDetector.Data()), GetCurrentRun());
1180
1181         if (!fStatusEntry) return 0;
1182         fStatusEntry->SetOwner(1);
1183
1184         AliShuttleStatus* status = dynamic_cast<AliShuttleStatus*> (fStatusEntry->GetObject());
1185         if (!status) {
1186                 AliError("Invalid object stored to CDB!");
1187                 return 0;
1188         }
1189
1190         return status;
1191 }
1192
1193 //______________________________________________________________________________________________
1194 Bool_t AliShuttle::WriteShuttleStatus(AliShuttleStatus* status)
1195 {
1196         //
1197         // writes the status for one subdetector
1198         //
1199
1200         if (fStatusEntry){
1201                 delete fStatusEntry;
1202                 fStatusEntry = 0;
1203         }
1204
1205         Int_t run = GetCurrentRun();
1206
1207         AliCDBId id(AliCDBPath("SHUTTLE", "STATUS", fCurrentDetector), run, run);
1208
1209         fStatusEntry = new AliCDBEntry(status, id, new AliCDBMetaData);
1210         fStatusEntry->SetOwner(1);
1211
1212         UInt_t result = AliCDBManager::Instance()->GetStorage(fgkLocalCDB)->Put(fStatusEntry);
1213
1214         if (!result) {
1215                 Log("SHUTTLE", Form("WriteShuttleStatus - Failed for %s, run %d",
1216                                                 fCurrentDetector.Data(), run));
1217                 return kFALSE;
1218         }
1219         
1220         SendMLInfo();
1221
1222         return kTRUE;
1223 }
1224
1225 //______________________________________________________________________________________________
1226 void AliShuttle::UpdateShuttleStatus(AliShuttleStatus::Status newStatus, Bool_t increaseCount)
1227 {
1228         //
1229         // changes the AliShuttleStatus for the given detector and run to the given status
1230         //
1231
1232         if (!fStatusEntry){
1233                 AliError("UNEXPECTED: fStatusEntry empty");
1234                 return;
1235         }
1236
1237         AliShuttleStatus* status = dynamic_cast<AliShuttleStatus*> (fStatusEntry->GetObject());
1238
1239         if (!status){
1240                 Log("SHUTTLE", "UpdateShuttleStatus - UNEXPECTED: status could not be read from current CDB entry");
1241                 return;
1242         }
1243
1244         TString actionStr = Form("UpdateShuttleStatus - %s: Changing state from %s to %s",
1245                                 fCurrentDetector.Data(),
1246                                 status->GetStatusName(),
1247                                 status->GetStatusName(newStatus));
1248         Log("SHUTTLE", actionStr);
1249         SetLastAction(actionStr);
1250
1251         status->SetStatus(newStatus);
1252         if (increaseCount) status->IncreaseCount();
1253
1254         AliCDBManager::Instance()->GetStorage(fgkLocalCDB)->Put(fStatusEntry);
1255
1256         SendMLInfo();
1257 }
1258
1259 //______________________________________________________________________________________________
1260 void AliShuttle::SendMLInfo()
1261 {
1262         //
1263         // sends ML information about the current status of the current detector being processed
1264         //
1265         
1266         AliShuttleStatus* status = dynamic_cast<AliShuttleStatus*> (fStatusEntry->GetObject());
1267         
1268         if (!status){
1269                 Log("SHUTTLE", "SendMLInfo - UNEXPECTED: status could not be read from current CDB entry");
1270                 return;
1271         }
1272         
1273         TMonaLisaText  mlStatus(Form("%s_status", fCurrentDetector.Data()), status->GetStatusName());
1274         TMonaLisaValue mlRetryCount(Form("%s_count", fCurrentDetector.Data()), status->GetCount());
1275
1276         TList mlList;
1277         mlList.Add(&mlStatus);
1278         mlList.Add(&mlRetryCount);
1279
1280         TString mlID;
1281         mlID.Form("%d", GetCurrentRun());
1282         fMonaLisa->SendParameters(&mlList, mlID);
1283 }
1284
1285 //______________________________________________________________________________________________
1286 Bool_t AliShuttle::ContinueProcessing()
1287 {
1288         // this function reads the AliShuttleStatus information from CDB and
1289         // checks if the processing should be continued
1290         // if yes it returns kTRUE and updates the AliShuttleStatus with nextStatus
1291
1292         if (!fConfig->HostProcessDetector(fCurrentDetector)) return kFALSE;
1293
1294         AliPreprocessor* aPreprocessor =
1295                 dynamic_cast<AliPreprocessor*> (fPreprocessorMap.GetValue(fCurrentDetector));
1296         if (!aPreprocessor)
1297         {
1298                 Log("SHUTTLE", Form("ContinueProcessing - %s: no preprocessor registered", fCurrentDetector.Data()));
1299                 return kFALSE;
1300         }
1301
1302         AliShuttleLogbookEntry::Status entryStatus =
1303                 fLogbookEntry->GetDetectorStatus(fCurrentDetector);
1304
1305         if(entryStatus != AliShuttleLogbookEntry::kUnprocessed) {
1306                 Log("SHUTTLE", Form("ContinueProcessing - %s is %s",
1307                                 fCurrentDetector.Data(),
1308                                 fLogbookEntry->GetDetectorStatusName(entryStatus)));
1309                 return kFALSE;
1310         }
1311
1312         // if we get here, according to Shuttle logbook subdetector is in UNPROCESSED state
1313
1314         // check if current run is first unprocessed run for current detector
1315         if (fConfig->StrictRunOrder(fCurrentDetector) &&
1316                 !fFirstUnprocessed[GetDetPos(fCurrentDetector)])
1317         {
1318                 if (fTestMode == kNone)
1319                 {
1320                         Log("SHUTTLE", Form("ContinueProcessing - %s requires strict run ordering"
1321                                         " but this is not the first unprocessed run!"));
1322                         return kFALSE;
1323                 }
1324                 else
1325                 {
1326                         Log("SHUTTLE", Form("ContinueProcessing - In TESTMODE - "
1327                                         "Although %s requires strict run ordering "
1328                                         "and this is not the first unprocessed run, "
1329                                         "the SHUTTLE continues"));
1330                 }
1331         }
1332
1333         AliShuttleStatus* status = ReadShuttleStatus();
1334         if (!status) {
1335                 // first time
1336                 Log("SHUTTLE", Form("ContinueProcessing - %s: Processing first time",
1337                                 fCurrentDetector.Data()));
1338                 status = new AliShuttleStatus(AliShuttleStatus::kStarted);
1339                 return WriteShuttleStatus(status);
1340         }
1341
1342         // The following two cases shouldn't happen if Shuttle Logbook was correctly updated.
1343         // If it happens it may mean Logbook updating failed... let's do it now!
1344         if (status->GetStatus() == AliShuttleStatus::kDone ||
1345             status->GetStatus() == AliShuttleStatus::kFailed){
1346                 Log("SHUTTLE", Form("ContinueProcessing - %s is already %s. Updating Shuttle Logbook",
1347                                         fCurrentDetector.Data(),
1348                                         status->GetStatusName(status->GetStatus())));
1349                 UpdateShuttleLogbook(fCurrentDetector.Data(),
1350                                         status->GetStatusName(status->GetStatus()));
1351                 return kFALSE;
1352         }
1353
1354         if (status->GetStatus() == AliShuttleStatus::kStoreError) {
1355                 Log("SHUTTLE",
1356                         Form("ContinueProcessing - %s: Grid storage of one or more "
1357                                 "objects failed. Trying again now",
1358                                 fCurrentDetector.Data()));
1359                 UpdateShuttleStatus(AliShuttleStatus::kStoreStarted);
1360                 if (StoreOCDB()){
1361                         Log("SHUTTLE", Form("ContinueProcessing - %s: all objects "
1362                                 "successfully stored into main storage",
1363                                 fCurrentDetector.Data()));
1364                 } else {
1365                         Log("SHUTTLE",
1366                                 Form("ContinueProcessing - %s: Grid storage failed again",
1367                                         fCurrentDetector.Data()));
1368                         UpdateShuttleStatus(AliShuttleStatus::kStoreError);
1369                 }
1370                 return kFALSE;
1371         }
1372
1373         // if we get here, there is a restart
1374         Bool_t cont = kFALSE;
1375
1376         // abort conditions
1377         if (status->GetCount() >= fConfig->GetMaxRetries()) {
1378                 Log("SHUTTLE", Form("ContinueProcessing - %s failed %d times in status %s - "
1379                                 "Updating Shuttle Logbook", fCurrentDetector.Data(),
1380                                 status->GetCount(), status->GetStatusName()));
1381                 UpdateShuttleLogbook(fCurrentDetector.Data(), "FAILED");
1382                 UpdateShuttleStatus(AliShuttleStatus::kFailed);
1383
1384                 // there may still be objects in local OCDB and reference storage
1385                 // and FXS databases may be not updated: do it now!
1386                 
1387                 // TODO Currently disabled, we want to keep files in case of failure!
1388                 // CleanLocalStorage(fgkLocalCDB);
1389                 // CleanLocalStorage(fgkLocalRefStorage);
1390                 // UpdateTableFailCase();
1391                 
1392                 // Send mail to detector expert!
1393                 Log("SHUTTLE", Form("ContinueProcessing - Sending mail to %s expert...", 
1394                                         fCurrentDetector.Data()));
1395                 if (!SendMail())
1396                         Log("SHUTTLE", Form("ContinueProcessing - Could not send mail to %s expert",
1397                                         fCurrentDetector.Data()));
1398
1399         } else {
1400                 Log("SHUTTLE", Form("ContinueProcessing - %s: restarting. "
1401                                 "Aborted before with %s. Retry number %d.", fCurrentDetector.Data(),
1402                                 status->GetStatusName(), status->GetCount()));
1403                 Bool_t increaseCount = kTRUE;
1404                 if (status->GetStatus() == AliShuttleStatus::kDCSError || 
1405                         status->GetStatus() == AliShuttleStatus::kDCSStarted)
1406                                 increaseCount = kFALSE;
1407                                 
1408                 UpdateShuttleStatus(AliShuttleStatus::kStarted, increaseCount);
1409                 cont = kTRUE;
1410         }
1411
1412         return cont;
1413 }
1414
1415 //______________________________________________________________________________________________
1416 Bool_t AliShuttle::Process(AliShuttleLogbookEntry* entry)
1417 {
1418         //
1419         // Makes data retrieval for all detectors in the configuration.
1420         // entry: Shuttle logbook entry, contains run paramenters and status of detectors
1421         // (Unprocessed, Inactive, Failed or Done).
1422         // Returns kFALSE in case of error occured and kTRUE otherwise
1423         //
1424
1425         if (!entry) return kFALSE;
1426
1427         fLogbookEntry = entry;
1428
1429         Log("SHUTTLE", Form("\t\t\t^*^*^*^*^*^*^*^*^*^*^*^* run %d: START ^*^*^*^*^*^*^*^*^*^*^*^*",
1430                                         GetCurrentRun()));
1431
1432         // Send the information to ML
1433         TMonaLisaText  mlStatus("SHUTTLE_status", "Processing");
1434         TMonaLisaText  mlRunType("SHUTTLE_runtype", Form("%s (%s)", entry->GetRunType(), entry->GetRunParameter("log")));
1435
1436         TList mlList;
1437         mlList.Add(&mlStatus);
1438         mlList.Add(&mlRunType);
1439
1440         TString mlID;
1441         mlID.Form("%d", GetCurrentRun());
1442         fMonaLisa->SendParameters(&mlList, mlID);
1443
1444         if (fLogbookEntry->IsDone())
1445         {
1446                 Log("SHUTTLE","Process - Shuttle is already DONE. Updating logbook");
1447                 UpdateShuttleLogbook("shuttle_done");
1448                 fLogbookEntry = 0;
1449                 return kTRUE;
1450         }
1451
1452         // read test mode if flag is set
1453         if (fReadTestMode)
1454         {
1455                 fTestMode = kNone;
1456                 TString logEntry(entry->GetRunParameter("log"));
1457                 //printf("log entry = %s\n", logEntry.Data());
1458                 TString searchStr("Testmode: ");
1459                 Int_t pos = logEntry.Index(searchStr.Data());
1460                 //printf("%d\n", pos);
1461                 if (pos >= 0)
1462                 {
1463                         TSubString subStr = logEntry(pos + searchStr.Length(), logEntry.Length());
1464                         //printf("%s\n", subStr.String().Data());
1465                         TString newStr(subStr.Data());
1466                         TObjArray* token = newStr.Tokenize(' ');
1467                         if (token)
1468                         {
1469                                 //token->Print();
1470                                 TObjString* tmpStr = dynamic_cast<TObjString*> (token->First());
1471                                 if (tmpStr)
1472                                 {
1473                                         Int_t testMode = tmpStr->String().Atoi();
1474                                         if (testMode > 0)
1475                                         {
1476                                                 Log("SHUTTLE", Form("Process - Enabling test mode %d", testMode));
1477                                                 SetTestMode((TestMode) testMode);
1478                                         }
1479                                 }
1480                                 delete token;          
1481                         }
1482                 }
1483         }
1484                 
1485         fLogbookEntry->Print("all");
1486
1487         // Initialization
1488         Bool_t hasError = kFALSE;
1489
1490         // Set the CDB and Reference folders according to the year and LHC period
1491         TString lhcPeriod(GetLHCPeriod());
1492         if (lhcPeriod.Length() == 0) 
1493         {
1494                 Log("SHUTTLE","Process - LHCPeriod not found in logbook!");
1495                 return 0; 
1496         }       
1497         
1498         if (fgkMainCDB.Length() == 0)
1499                 fgkMainCDB = Form("alien://folder=/alice/data/%d/%s/OCDB?user=alidaq?cacheFold=/tmp/OCDBCache", 
1500                                         GetCurrentYear(), lhcPeriod.Data());
1501         
1502         if (fgkMainRefStorage.Length() == 0)
1503                 fgkMainRefStorage = Form("alien://folder=/alice/data/%d/%s/Reference?user=alidaq?cacheFold=/tmp/OCDBCache", 
1504                                         GetCurrentYear(), lhcPeriod.Data());
1505         
1506         AliCDBStorage *mainCDBSto = AliCDBManager::Instance()->GetStorage(fgkMainCDB);
1507         if(mainCDBSto) mainCDBSto->QueryCDB(GetCurrentRun());
1508         AliCDBStorage *mainRefSto = AliCDBManager::Instance()->GetStorage(fgkMainRefStorage);
1509         if(mainRefSto) mainRefSto->QueryCDB(GetCurrentRun());
1510
1511         // Loop on detectors in the configuration
1512         TIter iter(fConfig->GetDetectors());
1513         TObjString* aDetector = 0;
1514
1515         while ((aDetector = (TObjString*) iter.Next()))
1516         {
1517                 fCurrentDetector = aDetector->String();
1518
1519                 if (ContinueProcessing() == kFALSE) continue;
1520
1521                 Log("SHUTTLE", Form("\t\t\t****** run %d - %s: START  ******",
1522                                                 GetCurrentRun(), aDetector->GetName()));
1523
1524                 for(Int_t iSys=0;iSys<3;iSys++) fFXSCalled[iSys]=kFALSE;
1525
1526                 Log(fCurrentDetector.Data(), "Process - Starting processing");
1527
1528                 Int_t pid = fork();
1529
1530                 if (pid < 0)
1531                 {
1532                         Log("SHUTTLE", "Process - ERROR: Forking failed");
1533                 }
1534                 else if (pid > 0)
1535                 {
1536                         // parent
1537                         Log("SHUTTLE", Form("Process - In parent process of %d - %s: Starting monitoring",
1538                                                         GetCurrentRun(), aDetector->GetName()));
1539
1540                         Long_t begin = time(0);
1541
1542                         int status; // to be used with waitpid, on purpose an int (not Int_t)!
1543                         while (waitpid(pid, &status, WNOHANG) == 0)
1544                         {
1545                                 Long_t expiredTime = time(0) - begin;
1546
1547                                 if (expiredTime > fConfig->GetPPTimeOut())
1548                                 {
1549                                         TString tmp;
1550                                         tmp.Form("Process - Process of %s time out. "
1551                                                         "Run time: %d seconds. Killing...",
1552                                                         fCurrentDetector.Data(), expiredTime);
1553                                         Log("SHUTTLE", tmp);
1554                                         Log(fCurrentDetector, tmp);
1555
1556                                         kill(pid, 9);
1557
1558                                         UpdateShuttleStatus(AliShuttleStatus::kPPTimeOut);
1559                                         hasError = kTRUE;
1560
1561                                         gSystem->Sleep(1000);
1562                                 }
1563                                 else
1564                                 {
1565                                         gSystem->Sleep(1000);
1566                                         
1567                                         TString checkStr;
1568                                         checkStr.Form("ps -o vsize --pid %d | tail -n 1", pid);
1569                                         FILE* pipe = gSystem->OpenPipe(checkStr, "r");
1570                                         if (!pipe)
1571                                         {
1572                                                 Log("SHUTTLE", Form("Process - Error: "
1573                                                         "Could not open pipe to %s", checkStr.Data()));
1574                                                 continue;
1575                                         }
1576                                                 
1577                                         char buffer[100];
1578                                         if (!fgets(buffer, 100, pipe))
1579                                         {
1580                                                 Log("SHUTTLE", "Process - Error: ps did not return anything");
1581                                                 gSystem->ClosePipe(pipe);
1582                                                 continue;
1583                                         }
1584                                         gSystem->ClosePipe(pipe);
1585                                         
1586                                         //Log("SHUTTLE", Form("ps returned %s", buffer));
1587                                         
1588                                         Int_t mem = 0;
1589                                         if ((sscanf(buffer, "%d\n", &mem) != 1) || !mem)
1590                                         {
1591                                                 Log("SHUTTLE", "Process - Error: Could not parse output of ps");
1592                                                 continue;
1593                                         }
1594                                         
1595                                         if (expiredTime % 60 == 0)
1596                                         {
1597                                                 Log("SHUTTLE", Form("Process - %s: Checking process. "
1598                                                         "Run time: %d seconds - Memory consumption: %d KB",
1599                                                         fCurrentDetector.Data(), expiredTime, mem));
1600                                                 SendAlive();
1601                                         }
1602                                         
1603                                         if (mem > fConfig->GetPPMaxMem())
1604                                         {
1605                                                 TString tmp;
1606                                                 tmp.Form("Process - Process exceeds maximum allowed memory "
1607                                                         "(%d KB > %d KB). Killing...",
1608                                                         mem, fConfig->GetPPMaxMem());
1609                                                 Log("SHUTTLE", tmp);
1610                                                 Log(fCurrentDetector, tmp);
1611         
1612                                                 kill(pid, 9);
1613         
1614                                                 UpdateShuttleStatus(AliShuttleStatus::kPPOutOfMemory);
1615                                                 hasError = kTRUE;
1616         
1617                                                 gSystem->Sleep(1000);
1618                                         }
1619                                 }
1620                         }
1621
1622                         Log("SHUTTLE", Form("Process - In parent process of %d - %s: Client has terminated.",
1623                                                                 GetCurrentRun(), aDetector->GetName()));
1624
1625                         if (WIFEXITED(status))
1626                         {
1627                                 Int_t returnCode = WEXITSTATUS(status);
1628
1629                                 Log("SHUTTLE", Form("Process - %s: the return code is %d", fCurrentDetector.Data(),
1630                                                                                 returnCode));
1631
1632                                 if (returnCode == 0) hasError = kTRUE;
1633                         }
1634                 }
1635                 else if (pid == 0)
1636                 {
1637                         // client
1638                         Log("SHUTTLE", Form("Process - In client process of %d - %s", GetCurrentRun(),
1639                                 aDetector->GetName()));
1640
1641                         Log("SHUTTLE", Form("Process - Redirecting output to %s log",fCurrentDetector.Data()));
1642
1643                         if ((freopen(GetLogFileName(fCurrentDetector), "a", stdout)) == 0)
1644                         {
1645                                 Log("SHUTTLE", "Process - Could not freopen stdout");
1646                         }
1647                         else
1648                         {
1649                                 fOutputRedirected = kTRUE;
1650                                 if ((dup2(fileno(stdout), fileno(stderr))) < 0)
1651                                         Log("SHUTTLE", "Process - Could not redirect stderr");
1652                                 
1653                         }
1654                         
1655                         TString wd = gSystem->WorkingDirectory();
1656                         TString tmpDir = Form("%s/%s_%d_process", GetShuttleTempDir(), 
1657                                 fCurrentDetector.Data(), GetCurrentRun());
1658                         
1659                         Int_t result = gSystem->GetPathInfo(tmpDir.Data(), 0, (Long64_t*) 0, 0, 0);
1660                         if (!result) // temp dir already exists!
1661                         {
1662                                 Log(fCurrentDetector.Data(), 
1663                                         Form("Process - %s dir already exists! Removing...", tmpDir.Data()));
1664                                 gSystem->Exec(Form("rm -rf %s",tmpDir.Data()));         
1665                         } 
1666                         
1667                         if (gSystem->mkdir(tmpDir.Data(), 1))
1668                         {
1669                                 Log(fCurrentDetector.Data(), "Process - could not make temp directory!!");
1670                                 gSystem->Exit(1);
1671                         }
1672                         
1673                         if (!gSystem->ChangeDirectory(tmpDir.Data())) 
1674                         {
1675                                 Log(fCurrentDetector.Data(), "Process - could not change directory!!");
1676                                 gSystem->Exit(1);                       
1677                         }
1678                         
1679                         Bool_t success = ProcessCurrentDetector();
1680                         
1681                         gSystem->ChangeDirectory(wd.Data());
1682                                                 
1683                         if (success) // Preprocessor finished successfully!
1684                         { 
1685                                 // remove temporary folder
1686                                 gSystem->Exec(Form("rm -rf %s",tmpDir.Data()));
1687                                 
1688                                 // Update time_processed field in FXS DB
1689                                 if (UpdateTable() == kFALSE)
1690                                         Log("SHUTTLE", Form("Process - %s: Could not update FXS databases!", 
1691                                                         fCurrentDetector.Data()));
1692
1693                                 // Transfer the data from local storage to main storage (Grid)
1694                                 UpdateShuttleStatus(AliShuttleStatus::kStoreStarted);
1695                                 if (StoreOCDB() == kFALSE)
1696                                 {
1697                                         Log("SHUTTLE", 
1698                                                 Form("\t\t\t****** run %d - %s: STORAGE ERROR ******",
1699                                                         GetCurrentRun(), aDetector->GetName()));
1700                                         UpdateShuttleStatus(AliShuttleStatus::kStoreError);
1701                                         success = kFALSE;
1702                                 } else {
1703                                         Log("SHUTTLE", 
1704                                                 Form("\t\t\t****** run %d - %s: DONE ******",
1705                                                         GetCurrentRun(), aDetector->GetName()));
1706                                         UpdateShuttleStatus(AliShuttleStatus::kDone);
1707                                         UpdateShuttleLogbook(fCurrentDetector, "DONE");
1708                                 }
1709                         } else 
1710                         {
1711                                 Log("SHUTTLE", 
1712                                         Form("\t\t\t****** run %d - %s: PP ERROR ******",
1713                                                 GetCurrentRun(), aDetector->GetName()));
1714                         }
1715
1716                         for (UInt_t iSys=0; iSys<3; iSys++)
1717                         {
1718                                 if (fFXSCalled[iSys]) fFXSlist[iSys].Clear();
1719                         }
1720
1721                         Log("SHUTTLE", Form("Process - Client process of %d - %s is exiting now with %d.",
1722                                                         GetCurrentRun(), aDetector->GetName(), success));
1723
1724                         // the client exits here
1725                         gSystem->Exit(success);
1726
1727                         AliError("We should never get here!!!");
1728                 }
1729         }
1730
1731         Log("SHUTTLE", Form("\t\t\t^*^*^*^*^*^*^*^*^*^*^*^* run %d: FINISH ^*^*^*^*^*^*^*^*^*^*^*^*",
1732                                                         GetCurrentRun()));
1733
1734         //check if shuttle is done for this run, if so update logbook
1735         TObjArray checkEntryArray;
1736         checkEntryArray.SetOwner(1);
1737         TString whereClause = Form("where run=%d", GetCurrentRun());
1738         if (!QueryShuttleLogbook(whereClause.Data(), checkEntryArray) || 
1739                         checkEntryArray.GetEntries() == 0) {
1740                 Log("SHUTTLE", Form("Process - Warning: Cannot check status of run %d on Shuttle logbook!",
1741                                                 GetCurrentRun()));
1742                 return hasError == kFALSE;
1743         }
1744
1745         AliShuttleLogbookEntry* checkEntry = dynamic_cast<AliShuttleLogbookEntry*>
1746                                                 (checkEntryArray.At(0));
1747
1748         if (checkEntry)
1749         {
1750                 if (checkEntry->IsDone())
1751                 {
1752                         Log("SHUTTLE","Process - Shuttle is DONE. Updating logbook");
1753                         UpdateShuttleLogbook("shuttle_done");
1754                 }
1755                 else
1756                 {
1757                         for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
1758                         {
1759                                 if (checkEntry->GetDetectorStatus(iDet) == AliShuttleLogbookEntry::kUnprocessed)
1760                                 {
1761                                         AliDebug(2, Form("Run %d: setting %s as \"not first time unprocessed\"",
1762                                                         checkEntry->GetRun(), GetDetName(iDet)));
1763                                         fFirstUnprocessed[iDet] = kFALSE;
1764                                 }
1765                         }
1766                 }
1767         }
1768
1769         fLogbookEntry = 0;
1770
1771         return hasError == kFALSE;
1772 }
1773
1774 //______________________________________________________________________________________________
1775 Bool_t AliShuttle::ProcessCurrentDetector()
1776 {
1777         //
1778         // Makes data retrieval just for a specific detector (fCurrentDetector).
1779         // Threre should be a configuration for this detector.
1780
1781         Log("SHUTTLE", Form("ProcessCurrentDetector - Retrieving values for %s, run %d", 
1782                                                 fCurrentDetector.Data(), GetCurrentRun()));
1783
1784         TString wd = gSystem->WorkingDirectory();
1785         
1786         if (!CleanReferenceStorage(fCurrentDetector.Data()))
1787                 return kFALSE;
1788         
1789         gSystem->ChangeDirectory(wd.Data());
1790         
1791         TMap* dcsMap = new TMap();
1792
1793         // call preprocessor
1794         AliPreprocessor* aPreprocessor =
1795                 dynamic_cast<AliPreprocessor*> (fPreprocessorMap.GetValue(fCurrentDetector));
1796
1797         aPreprocessor->Initialize(GetCurrentRun(), GetCurrentStartTime(), GetCurrentEndTime());
1798
1799         Bool_t processDCS = aPreprocessor->ProcessDCS();
1800
1801         if (!processDCS)
1802         {
1803                 Log(fCurrentDetector, "ProcessCurrentDetector -"
1804                         " The preprocessor requested to skip the retrieval of DCS values");
1805         }
1806         else if (fTestMode & kSkipDCS)
1807         {
1808                 Log(fCurrentDetector, "ProcessCurrentDetector - In TESTMODE: Skipping DCS processing");
1809         } 
1810         else if (fTestMode & kErrorDCS)
1811         {
1812                 Log(fCurrentDetector, "ProcessCurrentDetector - In TESTMODE: Simulating DCS error");
1813                 UpdateShuttleStatus(AliShuttleStatus::kDCSStarted);
1814                 UpdateShuttleStatus(AliShuttleStatus::kDCSError);
1815                 delete dcsMap;
1816                 return kFALSE;
1817         } else {
1818
1819                 UpdateShuttleStatus(AliShuttleStatus::kDCSStarted);
1820
1821                 // Query DCS archive
1822                 Int_t nServers = fConfig->GetNServers(fCurrentDetector);
1823                 
1824                 for (int iServ=0; iServ<nServers; iServ++)
1825                 {
1826                 
1827                         TString host(fConfig->GetDCSHost(fCurrentDetector, iServ));
1828                         Int_t port = fConfig->GetDCSPort(fCurrentDetector, iServ);
1829                         Int_t multiSplit = fConfig->GetMultiSplit(fCurrentDetector, iServ);
1830
1831                         Log(fCurrentDetector, Form("ProcessCurrentDetector -"
1832                                         " Querying DCS Amanda server %s:%d (%d of %d)", 
1833                                         host.Data(), port, iServ+1, nServers));
1834                         
1835                         TMap* aliasMap = 0;
1836                         TMap* dpMap = 0;
1837         
1838                         if (fConfig->GetDCSAliases(fCurrentDetector, iServ)->GetEntries() > 0)
1839                         {
1840                                 aliasMap = GetValueSet(host, port, 
1841                                                 fConfig->GetDCSAliases(fCurrentDetector, iServ), 
1842                                                 kAlias, multiSplit);
1843                                 if (!aliasMap)
1844                                 {
1845                                         Log(fCurrentDetector, 
1846                                                 Form("ProcessCurrentDetector -"
1847                                                         " Error retrieving DCS aliases from server %s."
1848                                                         " Sending mail to DCS experts!", host.Data()));
1849                                         UpdateShuttleStatus(AliShuttleStatus::kDCSError);
1850                                         
1851                                         if (!SendMailToDCS())
1852                                                 Log("SHUTTLE", Form("ProcessCurrentDetector - Could not send mail to DCS experts!"));
1853
1854                                         delete dcsMap;
1855                                         return kFALSE;
1856                                 }
1857                         }
1858                         
1859                         if (fConfig->GetDCSDataPoints(fCurrentDetector, iServ)->GetEntries() > 0)
1860                         {
1861                                 dpMap = GetValueSet(host, port, 
1862                                                 fConfig->GetDCSDataPoints(fCurrentDetector, iServ), 
1863                                                 kDP, multiSplit);
1864                                 if (!dpMap)
1865                                 {
1866                                         Log(fCurrentDetector, 
1867                                                 Form("ProcessCurrentDetector -"
1868                                                         " Error retrieving DCS data points from server %s."
1869                                                         " Sending mail to DCS experts!", host.Data()));
1870                                         UpdateShuttleStatus(AliShuttleStatus::kDCSError);
1871                                         
1872                                         if (!SendMailToDCS())
1873                                                 Log("SHUTTLE", Form("ProcessCurrentDetector - Could not send mail to DCS experts!"));
1874                                         
1875                                         if (aliasMap) delete aliasMap;
1876                                         delete dcsMap;
1877                                         return kFALSE;
1878                                 }                               
1879                         }
1880                         
1881                         // merge aliasMap and dpMap into dcsMap
1882                         if(aliasMap) {
1883                                 TIter iter(aliasMap);
1884                                 TObjString* key = 0;
1885                                 while ((key = (TObjString*) iter.Next()))
1886                                         dcsMap->Add(key, aliasMap->GetValue(key->String()));
1887                                 
1888                                 aliasMap->SetOwner(kFALSE);
1889                                 delete aliasMap;
1890                         }       
1891                         
1892                         if(dpMap) {
1893                                 TIter iter(dpMap);
1894                                 TObjString* key = 0;
1895                                 while ((key = (TObjString*) iter.Next()))
1896                                         dcsMap->Add(key, dpMap->GetValue(key->String()));
1897                                 
1898                                 dpMap->SetOwner(kFALSE);
1899                                 delete dpMap;
1900                         }
1901                 }
1902         }
1903         
1904         // save map into file, to help debugging in case of preprocessor error
1905         TFile* f = TFile::Open("DCSMap.root","recreate");
1906         f->cd();
1907         dcsMap->Write("DCSMap", TObject::kSingleKey);
1908         f->Close();
1909         delete f;
1910         
1911         // DCS Archive DB processing successful. Call Preprocessor!
1912         UpdateShuttleStatus(AliShuttleStatus::kPPStarted);
1913
1914         UInt_t returnValue = aPreprocessor->Process(dcsMap);
1915
1916         if (returnValue > 0) // Preprocessor error!
1917         {
1918                 Log(fCurrentDetector, Form("ProcessCurrentDetector - "
1919                                 "Preprocessor failed. Process returned %d.", returnValue));
1920                 UpdateShuttleStatus(AliShuttleStatus::kPPError);
1921                 dcsMap->DeleteAll();
1922                 delete dcsMap;
1923                 return kFALSE;
1924         }
1925         
1926         // preprocessor ok!
1927         UpdateShuttleStatus(AliShuttleStatus::kPPDone);
1928         Log(fCurrentDetector, Form("ProcessCurrentDetector - %s preprocessor returned success",
1929                                 fCurrentDetector.Data()));
1930
1931         dcsMap->DeleteAll();
1932         delete dcsMap;
1933
1934         return kTRUE;
1935 }
1936
1937 //______________________________________________________________________________________________
1938 Bool_t AliShuttle::QueryShuttleLogbook(const char* whereClause,
1939                 TObjArray& entries)
1940 {
1941         // Query DAQ's Shuttle logbook and fills detector status object.
1942         // Call QueryRunParameters to query DAQ logbook for run parameters.
1943         //
1944
1945         entries.SetOwner(1);
1946
1947         // check connection, in case connect
1948         if(!Connect(3)) return kFALSE;
1949
1950         TString sqlQuery;
1951         sqlQuery = Form("select * from %s %s order by run", fConfig->GetShuttlelbTable(), whereClause);
1952
1953         TSQLResult* aResult = fServer[3]->Query(sqlQuery);
1954         if (!aResult) {
1955                 AliError(Form("Can't execute query <%s>!", sqlQuery.Data()));
1956                 return kFALSE;
1957         }
1958
1959         AliDebug(2,Form("Query = %s", sqlQuery.Data()));
1960
1961         if(aResult->GetRowCount() == 0) {
1962                 Log("SHUTTLE", "No entries in Shuttle Logbook match request");
1963                 delete aResult;
1964                 return kTRUE;
1965         }
1966
1967         // TODO Check field count!
1968         const UInt_t nCols = 23;
1969         if (aResult->GetFieldCount() != (Int_t) nCols) {
1970                 Log("SHUTTLE", "Invalid SQL result field number!");
1971                 delete aResult;
1972                 return kFALSE;
1973         }
1974
1975         TSQLRow* aRow;
1976         while ((aRow = aResult->Next())) {
1977                 TString runString(aRow->GetField(0), aRow->GetFieldLength(0));
1978                 Int_t run = runString.Atoi();
1979
1980                 AliShuttleLogbookEntry *entry = QueryRunParameters(run);
1981                 if (!entry)
1982                         continue;
1983
1984                 // loop on detectors
1985                 for(UInt_t ii = 0; ii < nCols; ii++)
1986                         entry->SetDetectorStatus(aResult->GetFieldName(ii), aRow->GetField(ii));
1987
1988                 entries.AddLast(entry);
1989                 delete aRow;
1990         }
1991
1992         delete aResult;
1993         return kTRUE;
1994 }
1995
1996 //______________________________________________________________________________________________
1997 AliShuttleLogbookEntry* AliShuttle::QueryRunParameters(Int_t run)
1998 {
1999         //
2000         // Retrieve run parameters written in the DAQ logbook and sets them into AliShuttleLogbookEntry object
2001         //
2002
2003         // check connection, in case connect
2004         if (!Connect(3))
2005                 return 0;
2006
2007         TString sqlQuery;
2008         sqlQuery.Form("select * from %s where run=%d", fConfig->GetDAQlbTable(), run);
2009
2010         TSQLResult* aResult = fServer[3]->Query(sqlQuery);
2011         if (!aResult) {
2012                 Log("SHUTTLE", Form("Can't execute query <%s>!", sqlQuery.Data()));
2013                 return 0;
2014         }
2015
2016         if (aResult->GetRowCount() == 0) {
2017                 Log("SHUTTLE", Form("QueryRunParameters - No entry in DAQ Logbook for run %d. Skipping", run));
2018                 delete aResult;
2019                 return 0;
2020         }
2021
2022         if (aResult->GetRowCount() > 1) {
2023                 Log("SHUTTLE", Form("QueryRunParameters - UNEXPECTED: "
2024                                 "more than one entry in DAQ Logbook for run %d!", run));
2025                 delete aResult;
2026                 return 0;
2027         }
2028
2029         TSQLRow* aRow = aResult->Next();
2030         if (!aRow)
2031         {
2032                 Log("SHUTTLE", Form("QueryRunParameters - Could not retrieve row for run %d. Skipping", run));
2033                 delete aResult;
2034                 return 0;
2035         }
2036
2037         AliShuttleLogbookEntry* entry = new AliShuttleLogbookEntry(run);
2038
2039         for (Int_t ii = 0; ii < aResult->GetFieldCount(); ii++)
2040                 entry->SetRunParameter(aResult->GetFieldName(ii), aRow->GetField(ii));
2041
2042         UInt_t startTime = entry->GetStartTime();
2043         UInt_t endTime = entry->GetEndTime();
2044
2045 //      if (!startTime || !endTime || startTime > endTime) 
2046 //      {
2047 //              Log("SHUTTLE",
2048 //                      Form("QueryRunParameters - Invalid parameters for Run %d: startTime = %d, endTime = %d. Skipping!",
2049 //                              run, startTime, endTime));              
2050 //              
2051 //              Log("SHUTTLE", Form("Marking SHUTTLE done for run %d", run));
2052 //              fLogbookEntry = entry;  
2053 //              if (!UpdateShuttleLogbook("shuttle_done"))
2054 //              {
2055 //                      AliError(Form("Could not update logbook for run %d !", run));
2056 //              }
2057 //              fLogbookEntry = 0;
2058 //                              
2059 //              delete entry;
2060 //              delete aRow;
2061 //              delete aResult;
2062 //              return 0;
2063 //      }
2064
2065         if (!startTime) 
2066         {
2067                 Log("SHUTTLE",
2068                         Form("QueryRunParameters - Invalid parameters for Run %d: " 
2069                                 "startTime = %d, endTime = %d. Skipping!",
2070                                         run, startTime, endTime));              
2071                 
2072                 Log("SHUTTLE", Form("Marking SHUTTLE done for run %d", run));
2073                 fLogbookEntry = entry;  
2074                 if (!UpdateShuttleLogbook("shuttle_ignored"))
2075                 {
2076                         AliError(Form("Could not update logbook for run %d !", run));
2077                 }
2078                 fLogbookEntry = 0;
2079                                 
2080                 delete entry;
2081                 delete aRow;
2082                 delete aResult;
2083                 return 0;
2084         }
2085         
2086         if (startTime && !endTime) 
2087         {
2088                 // TODO Here we don't mark SHUTTLE done, because this may mean 
2089                 //the run is still ongoing!!            
2090                 Log("SHUTTLE",
2091                         Form("QueryRunParameters - Invalid parameters for Run %d: "
2092                              "startTime = %d, endTime = %d. Skipping (Shuttle won't be marked as DONE)!",
2093                                         run, startTime, endTime));              
2094                 
2095                 //Log("SHUTTLE", Form("Marking SHUTTLE done for run %d", run));
2096                 //fLogbookEntry = entry;        
2097                 //if (!UpdateShuttleLogbook("shuttle_done"))
2098                 //{
2099                 //      AliError(Form("Could not update logbook for run %d !", run));
2100                 //}
2101                 //fLogbookEntry = 0;
2102                                 
2103                 delete entry;
2104                 delete aRow;
2105                 delete aResult;
2106                 return 0;
2107         }
2108                         
2109         if (startTime && endTime && (startTime > endTime)) 
2110         {
2111                 Log("SHUTTLE",
2112                         Form("QueryRunParameters - Invalid parameters for Run %d: "
2113                                 "startTime = %d, endTime = %d. Skipping!",
2114                                         run, startTime, endTime));              
2115                 
2116                 Log("SHUTTLE", Form("Marking SHUTTLE done for run %d", run));
2117                 fLogbookEntry = entry;  
2118                 if (!UpdateShuttleLogbook("shuttle_ignored"))
2119                 {
2120                         AliError(Form("Could not update logbook for run %d !", run));
2121                 }
2122                 fLogbookEntry = 0;
2123                                 
2124                 delete entry;
2125                 delete aRow;
2126                 delete aResult;
2127                 return 0;
2128         }
2129                         
2130         TString totEventsStr = entry->GetRunParameter("totalEvents");  
2131         Int_t totEvents = totEventsStr.Atoi();
2132         if (totEvents < 1) 
2133         {
2134                 Log("SHUTTLE",
2135                         Form("QueryRunParameters - Run %d has 0 events - Skipping!", run));             
2136                 
2137                 Log("SHUTTLE", Form("Marking SHUTTLE done for run %d", run));           
2138                 fLogbookEntry = entry;  
2139                 if (!UpdateShuttleLogbook("shuttle_ignored"))
2140                 {
2141                         AliError(Form("Could not update logbook for run %d !", run));
2142                 }
2143                 fLogbookEntry = 0;
2144                                 
2145                 delete entry;
2146                 delete aRow;
2147                 delete aResult;
2148                 return 0;
2149         }
2150
2151         delete aRow;
2152         delete aResult;
2153
2154         return entry;
2155 }
2156
2157 //______________________________________________________________________________________________
2158 TMap* AliShuttle::GetValueSet(const char* host, Int_t port, const TSeqCollection* entries,
2159                               DCSType type, Int_t multiSplit)
2160 {
2161         // Retrieve all "entry" data points from the DCS server
2162         // host, port: TSocket connection parameters
2163         // entries: list of name of the alias or data point
2164         // type: kAlias or kDP
2165         // returns TMap of values, 0 when failure
2166         
2167         AliDCSClient client(host, port, fTimeout, fRetries, multiSplit);
2168
2169         TMap* result = 0;
2170         if (type == kAlias)
2171         {
2172                 result = client.GetAliasValues(entries, GetCurrentStartTime(), 
2173                         GetCurrentEndTime());
2174         } 
2175         else if (type == kDP)
2176         {
2177                 result = client.GetDPValues(entries, GetCurrentStartTime(), 
2178                         GetCurrentEndTime());
2179         }
2180
2181         if (result == 0)
2182         {
2183                 Log(fCurrentDetector.Data(), Form("GetValueSet - Can't get entries! Reason: %s",
2184                         client.GetErrorString(client.GetResultErrorCode())));
2185                 if (client.GetResultErrorCode() == AliDCSClient::fgkServerError)        
2186                         Log(fCurrentDetector.Data(), Form("GetValueSet - Server error code: %s",
2187                                 client.GetServerError().Data()));
2188
2189                 return 0;
2190         }
2191                 
2192         return result;
2193 }
2194
2195 //______________________________________________________________________________________________
2196 const char* AliShuttle::GetFile(Int_t system, const char* detector,
2197                 const char* id, const char* source)
2198 {
2199         // Get calibration file from file exchange servers
2200         // First queris the FXS database for the file name, using the run, detector, id and source info
2201         // then calls RetrieveFile(filename) for actual copy to local disk
2202         // run: current run being processed (given by Logbook entry fLogbookEntry)
2203         // detector: the Preprocessor name
2204         // id: provided as a parameter by the Preprocessor
2205         // source: provided by the Preprocessor through GetFileSources function
2206
2207         // check if test mode should simulate a FXS error
2208         if (fTestMode & kErrorFXSFiles)
2209         {
2210                 Log(detector, Form("GetFile - In TESTMODE - Simulating error while connecting to %s FXS", GetSystemName(system)));
2211                 return 0;
2212         }
2213         
2214         // check connection, in case connect
2215         if (!Connect(system))
2216         {
2217                 Log(detector, Form("GetFile - Couldn't connect to %s FXS database", GetSystemName(system)));
2218                 return 0;
2219         }
2220
2221         // Query preparation
2222         TString sourceName(source);
2223         Int_t nFields = 3;
2224         TString sqlQueryStart = Form("select filePath,size,fileChecksum from %s where",
2225                                                                 fConfig->GetFXSdbTable(system));
2226         TString whereClause = Form("run=%d and detector=\"%s\" and fileId=\"%s\"",
2227                                                                 GetCurrentRun(), detector, id);
2228
2229         if (system == kDAQ)
2230         {
2231                 whereClause += Form(" and DAQsource=\"%s\"", source);
2232         }
2233         else if (system == kDCS)
2234         {
2235                 sourceName="none";
2236         }
2237         else if (system == kHLT)
2238         {
2239                 whereClause += Form(" and DDLnumbers=\"%s\"", source);
2240                 nFields = 3;
2241         }
2242
2243         TString sqlQuery = Form("%s %s", sqlQueryStart.Data(), whereClause.Data());
2244
2245         AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
2246
2247         // Query execution
2248         TSQLResult* aResult = 0;
2249         aResult = dynamic_cast<TSQLResult*> (fServer[system]->Query(sqlQuery));
2250         if (!aResult) {
2251                 Log(detector, Form("GetFileName - Can't execute SQL query to %s database for: id = %s, source = %s",
2252                                 GetSystemName(system), id, sourceName.Data()));
2253                 return 0;
2254         }
2255
2256         if(aResult->GetRowCount() == 0)
2257         {
2258                 Log(detector,
2259                         Form("GetFileName - No entry in %s FXS db for: id = %s, source = %s",
2260                                 GetSystemName(system), id, sourceName.Data()));
2261                 delete aResult;
2262                 return 0;
2263         }
2264
2265         if (aResult->GetRowCount() > 1) {
2266                 Log(detector,
2267                         Form("GetFileName - More than one entry in %s FXS db for: id = %s, source = %s",
2268                                 GetSystemName(system), id, sourceName.Data()));
2269                 delete aResult;
2270                 return 0;
2271         }
2272
2273         if (aResult->GetFieldCount() != nFields) {
2274                 Log(detector,
2275                         Form("GetFileName - Wrong field count in %s FXS db for: id = %s, source = %s",
2276                                 GetSystemName(system), id, sourceName.Data()));
2277                 delete aResult;
2278                 return 0;
2279         }
2280
2281         TSQLRow* aRow = dynamic_cast<TSQLRow*> (aResult->Next());
2282
2283         if (!aRow){
2284                 Log(detector, Form("GetFileName - Empty set result in %s FXS db from query: id = %s, source = %s",
2285                                 GetSystemName(system), id, sourceName.Data()));
2286                 delete aResult;
2287                 return 0;
2288         }
2289
2290         TString filePath(aRow->GetField(0), aRow->GetFieldLength(0));
2291         TString fileSize(aRow->GetField(1), aRow->GetFieldLength(1));
2292         TString fileChecksum(aRow->GetField(2), aRow->GetFieldLength(2));
2293
2294         delete aResult;
2295         delete aRow;
2296
2297         AliDebug(2, Form("filePath = %s; size = %s, fileChecksum = %s",
2298                                 filePath.Data(), fileSize.Data(), fileChecksum.Data()));
2299
2300         // retrieved file is renamed to make it unique
2301         TString localFileName = Form("%s/%s_%d_process/%s_%s_%d_%s_%s.shuttle",
2302                                         GetShuttleTempDir(), detector, GetCurrentRun(),
2303                                         GetSystemName(system), detector, GetCurrentRun(), 
2304                                         id, sourceName.Data());
2305
2306
2307         // file retrieval from FXS
2308         UInt_t nRetries = 0;
2309         UInt_t maxRetries = 3;
2310         Bool_t result = kFALSE;
2311
2312         // copy!! if successful TSystem::Exec returns 0
2313         while(nRetries++ < maxRetries) {
2314                 AliDebug(2, Form("Trying to copy file. Retry # %d", nRetries));
2315                 result = RetrieveFile(system, filePath.Data(), localFileName.Data());
2316                 if(!result)
2317                 {
2318                         Log(detector, Form("GetFileName - Copy of file %s from %s FXS failed",
2319                                         filePath.Data(), GetSystemName(system)));
2320                         continue;
2321                 } 
2322
2323                 if (fileChecksum.Length()>0)
2324                 {
2325                         // compare md5sum of local file with the one stored in the FXS DB
2326                         Int_t md5Comp = gSystem->Exec(Form("md5sum %s |grep %s 2>&1 > /dev/null",
2327                                                 localFileName.Data(), fileChecksum.Data()));
2328
2329                         if (md5Comp != 0)
2330                         {
2331                                 Log(detector, Form("GetFileName - md5sum of file %s does not match with local copy!",
2332                                                         filePath.Data()));
2333                                 result = kFALSE;
2334                                 continue;
2335                         }
2336                 } else {
2337                         Log(fCurrentDetector, Form("GetFile - md5sum of file %s not set in %s database, skipping comparison",
2338                                                         filePath.Data(), GetSystemName(system)));
2339                 }
2340                 if (result) break;
2341         }
2342
2343         if(!result) return 0;
2344
2345         fFXSCalled[system]=kTRUE;
2346         TObjString *fileParams = new TObjString(Form("%s#!?!#%s", id, sourceName.Data()));
2347         fFXSlist[system].Add(fileParams);
2348
2349         static TString staticLocalFileName;
2350         staticLocalFileName.Form("%s", localFileName.Data());
2351         
2352         Log(fCurrentDetector, Form("GetFile - Retrieved file with id %s and "
2353                         "source %s from %s to %s", id, source, 
2354                         GetSystemName(system), localFileName.Data()));
2355                         
2356         return staticLocalFileName.Data();
2357 }
2358
2359 //______________________________________________________________________________________________
2360 Bool_t AliShuttle::RetrieveFile(UInt_t system, const char* fxsFileName, const char* localFileName)
2361 {
2362         //
2363         // Copies file from FXS to local Shuttle machine
2364         //
2365
2366         // check temp directory: trying to cd to temp; if it does not exist, create it
2367         AliDebug(2, Form("Copy file %s from %s FXS into %s",
2368                         GetSystemName(system), fxsFileName, localFileName));
2369                         
2370         TString tmpDir(localFileName);
2371         
2372         tmpDir = tmpDir(0,tmpDir.Last('/'));
2373
2374         Int_t noDir = gSystem->GetPathInfo(tmpDir.Data(), 0, (Long64_t*) 0, 0, 0);
2375         if (noDir) // temp dir does not exists!
2376         {
2377                 if (gSystem->mkdir(tmpDir.Data(), 1))
2378                 {
2379                         Log(fCurrentDetector.Data(), "RetrieveFile - could not make temp directory!!");
2380                         return kFALSE;
2381                 }
2382         }
2383
2384         TString baseFXSFolder;
2385         if (system == kDAQ)
2386         {
2387                 baseFXSFolder = "FES/";
2388         }
2389         else if (system == kDCS)
2390         {
2391                 baseFXSFolder = "";
2392         }
2393         else if (system == kHLT)
2394         {
2395                 baseFXSFolder = "/opt/FXS/";
2396         }
2397
2398
2399         TString command = Form("scp -oPort=%d -2 %s@%s:%s%s %s",
2400                 fConfig->GetFXSPort(system),
2401                 fConfig->GetFXSUser(system),
2402                 fConfig->GetFXSHost(system),
2403                 baseFXSFolder.Data(),
2404                 fxsFileName,
2405                 localFileName);
2406
2407         AliDebug(2, Form("%s",command.Data()));
2408
2409         Bool_t result = (gSystem->Exec(command.Data()) == 0);
2410
2411         return result;
2412 }
2413
2414 //______________________________________________________________________________________________
2415 TList* AliShuttle::GetFileSources(Int_t system, const char* detector, const char* id)
2416 {
2417         //
2418         // Get sources producing the condition file Id from file exchange servers
2419         // if id is NULL all sources are returned (distinct)
2420         //
2421
2422         Log(detector, Form("GetFileSources - Retrieving sources with id %s from %s", id, GetSystemName(system)));
2423         
2424         // check if test mode should simulate a FXS error
2425         if (fTestMode & kErrorFXSSources)
2426         {
2427                 Log(detector, Form("GetFileSources - In TESTMODE - Simulating error while connecting to %s FXS", GetSystemName(system)));
2428                 return 0;
2429         }
2430
2431         if (system == kDCS)
2432         {
2433                 Log(detector, "GetFileSources - WARNING: DCS system has only one source of data!");
2434                 TList *list = new TList();
2435                 list->SetOwner(1);
2436                 list->Add(new TObjString(" "));
2437                 return list;
2438         }
2439
2440         // check connection, in case connect
2441         if (!Connect(system))
2442         {
2443                 Log(detector, Form("GetFileSources - Couldn't connect to %s FXS database", GetSystemName(system)));
2444                 return NULL;
2445         }
2446
2447         TString sourceName = 0;
2448         if (system == kDAQ)
2449         {
2450                 sourceName = "DAQsource";
2451         } else if (system == kHLT)
2452         {
2453                 sourceName = "DDLnumbers";
2454         }
2455
2456         TString sqlQueryStart = Form("select distinct %s from %s where", sourceName.Data(), fConfig->GetFXSdbTable(system));
2457         TString whereClause = Form("run=%d and detector=\"%s\"",
2458                                 GetCurrentRun(), detector);
2459         if (id)
2460                 whereClause += Form(" and fileId=\"%s\"", id);
2461         TString sqlQuery = Form("%s %s", sqlQueryStart.Data(), whereClause.Data());
2462
2463         AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
2464
2465         // Query execution
2466         TSQLResult* aResult;
2467         aResult = fServer[system]->Query(sqlQuery);
2468         if (!aResult) {
2469                 Log(detector, Form("GetFileSources - Can't execute SQL query to %s database for id: %s",
2470                                 GetSystemName(system), id));
2471                 return 0;
2472         }
2473
2474         TList *list = new TList();
2475         list->SetOwner(1);
2476         
2477         if (aResult->GetRowCount() == 0)
2478         {
2479                 Log(detector,
2480                         Form("GetFileSources - No entry in %s FXS table for id: %s", GetSystemName(system), id));
2481                 delete aResult;
2482                 return list;
2483         }
2484
2485         Log(detector, Form("GetFileSources - Found %d sources", aResult->GetRowCount()));
2486
2487         TSQLRow* aRow;
2488         while ((aRow = aResult->Next()))
2489         {
2490
2491                 TString source(aRow->GetField(0), aRow->GetFieldLength(0));
2492                 AliDebug(2, Form("%s = %s", sourceName.Data(), source.Data()));
2493                 list->Add(new TObjString(source));
2494                 delete aRow;
2495         }
2496
2497         delete aResult;
2498
2499         return list;
2500 }
2501
2502 //______________________________________________________________________________________________
2503 TList* AliShuttle::GetFileIDs(Int_t system, const char* detector, const char* source)
2504 {
2505         //
2506         // Get all ids of condition files produced by a given source from file exchange servers
2507         //
2508         
2509         Log(detector, Form("GetFileIDs - Retrieving ids with source %s with %s", source, GetSystemName(system)));
2510
2511         // check if test mode should simulate a FXS error
2512         if (fTestMode & kErrorFXSSources)
2513         {
2514                 Log(detector, Form("GetFileIDs - In TESTMODE - Simulating error while connecting to %s FXS", GetSystemName(system)));
2515                 return 0;
2516         }
2517
2518         // check connection, in case connect
2519         if (!Connect(system))
2520         {
2521                 Log(detector, Form("GetFileIDs - Couldn't connect to %s FXS database", GetSystemName(system)));
2522                 return NULL;
2523         }
2524
2525         TString sourceName = 0;
2526         if (system == kDAQ)
2527         {
2528                 sourceName = "DAQsource";
2529         } else if (system == kHLT)
2530         {
2531                 sourceName = "DDLnumbers";
2532         }
2533
2534         TString sqlQueryStart = Form("select fileId from %s where", fConfig->GetFXSdbTable(system));
2535         TString whereClause = Form("run=%d and detector=\"%s\"",
2536                                 GetCurrentRun(), detector);
2537         if (sourceName.Length() > 0 && source)
2538                 whereClause += Form(" and %s=\"%s\"", sourceName.Data(), source);
2539         TString sqlQuery = Form("%s %s", sqlQueryStart.Data(), whereClause.Data());
2540
2541         AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
2542
2543         // Query execution
2544         TSQLResult* aResult;
2545         aResult = fServer[system]->Query(sqlQuery);
2546         if (!aResult) {
2547                 Log(detector, Form("GetFileIDs - Can't execute SQL query to %s database for source: %s",
2548                                 GetSystemName(system), source));
2549                 return 0;
2550         }
2551
2552         TList *list = new TList();
2553         list->SetOwner(1);
2554         
2555         if (aResult->GetRowCount() == 0)
2556         {
2557                 Log(detector,
2558                         Form("GetFileIDs - No entry in %s FXS table for source: %s", GetSystemName(system), source));
2559                 delete aResult;
2560                 return list;
2561         }
2562
2563         Log(detector, Form("GetFileIDs - Found %d ids", aResult->GetRowCount()));
2564
2565         TSQLRow* aRow;
2566
2567         while ((aRow = aResult->Next()))
2568         {
2569
2570                 TString id(aRow->GetField(0), aRow->GetFieldLength(0));
2571                 AliDebug(2, Form("fileId = %s", id.Data()));
2572                 list->Add(new TObjString(id));
2573                 delete aRow;
2574         }
2575
2576         delete aResult;
2577
2578         return list;
2579 }
2580
2581 //______________________________________________________________________________________________
2582 Bool_t AliShuttle::Connect(Int_t system)
2583 {
2584         // Connect to MySQL Server of the system's FXS MySQL databases
2585         // DAQ Logbook, Shuttle Logbook and DAQ FXS db are on the same host
2586         //
2587
2588         // check connection: if already connected return
2589         if(fServer[system] && fServer[system]->IsConnected()) return kTRUE;
2590
2591         TString dbHost, dbUser, dbPass, dbName;
2592
2593         if (system < 3) // FXS db servers
2594         {
2595                 dbHost = Form("mysql://%s:%d", fConfig->GetFXSdbHost(system), fConfig->GetFXSdbPort(system));
2596                 dbUser = fConfig->GetFXSdbUser(system);
2597                 dbPass = fConfig->GetFXSdbPass(system);
2598                 dbName =   fConfig->GetFXSdbName(system);
2599         } else { // Run & Shuttle logbook servers
2600         // TODO Will the Shuttle logbook server be the same as the Run logbook server ???
2601                 dbHost = Form("mysql://%s:%d", fConfig->GetDAQlbHost(), fConfig->GetDAQlbPort());
2602                 dbUser = fConfig->GetDAQlbUser();
2603                 dbPass = fConfig->GetDAQlbPass();
2604                 dbName =   fConfig->GetDAQlbDB();
2605         }
2606
2607         fServer[system] = TSQLServer::Connect(dbHost.Data(), dbUser.Data(), dbPass.Data());
2608         if (!fServer[system] || !fServer[system]->IsConnected()) {
2609                 if(system < 3)
2610                 {
2611                 AliError(Form("Can't establish connection to FXS database for %s",
2612                                         AliShuttleInterface::GetSystemName(system)));
2613                 } else {
2614                 AliError("Can't establish connection to Run logbook.");
2615                 }
2616                 if(fServer[system]) delete fServer[system];
2617                 return kFALSE;
2618         }
2619
2620         // Get tables
2621         TSQLResult* aResult=0;
2622         switch(system){
2623                 case kDAQ:
2624                         aResult = fServer[kDAQ]->GetTables(dbName.Data());
2625                         break;
2626                 case kDCS:
2627                         aResult = fServer[kDCS]->GetTables(dbName.Data());
2628                         break;
2629                 case kHLT:
2630                         aResult = fServer[kHLT]->GetTables(dbName.Data());
2631                         break;
2632                 default:
2633                         aResult = fServer[3]->GetTables(dbName.Data());
2634                         break;
2635         }
2636
2637         delete aResult;
2638         return kTRUE;
2639 }
2640
2641 //______________________________________________________________________________________________
2642 Bool_t AliShuttle::UpdateTable()
2643 {
2644         //
2645         // Update FXS table filling time_processed field in all rows corresponding to current run and detector
2646         //
2647
2648         Bool_t result = kTRUE;
2649
2650         for (UInt_t system=0; system<3; system++)
2651         {
2652                 if(!fFXSCalled[system]) continue;
2653
2654                 // check connection, in case connect
2655                 if (!Connect(system))
2656                 {
2657                         Log(fCurrentDetector, Form("UpdateTable - Couldn't connect to %s FXS database", GetSystemName(system)));
2658                         result = kFALSE;
2659                         continue;
2660                 }
2661
2662                 TTimeStamp now; // now
2663
2664                 // Loop on FXS list entries
2665                 TIter iter(&fFXSlist[system]);
2666                 TObjString *aFXSentry=0;
2667                 while ((aFXSentry = dynamic_cast<TObjString*> (iter.Next())))
2668                 {
2669                         TString aFXSentrystr = aFXSentry->String();
2670                         TObjArray *aFXSarray = aFXSentrystr.Tokenize("#!?!#");
2671                         if (!aFXSarray || aFXSarray->GetEntries() != 2 )
2672                         {
2673                                 Log(fCurrentDetector, Form("UpdateTable - error updating %s FXS entry. Check string: <%s>",
2674                                         GetSystemName(system), aFXSentrystr.Data()));
2675                                 if(aFXSarray) delete aFXSarray;
2676                                 result = kFALSE;
2677                                 continue;
2678                         }
2679                         const char* fileId = ((TObjString*) aFXSarray->At(0))->GetName();
2680                         const char* source = ((TObjString*) aFXSarray->At(1))->GetName();
2681
2682                         TString whereClause;
2683                         if (system == kDAQ)
2684                         {
2685                                 whereClause = Form("where run=%d and detector=\"%s\" and fileId=\"%s\" and DAQsource=\"%s\";",
2686                                                         GetCurrentRun(), fCurrentDetector.Data(), fileId, source);
2687                         }
2688                         else if (system == kDCS)
2689                         {
2690                                 whereClause = Form("where run=%d and detector=\"%s\" and fileId=\"%s\";",
2691                                                         GetCurrentRun(), fCurrentDetector.Data(), fileId);
2692                         }
2693                         else if (system == kHLT)
2694                         {
2695                                 whereClause = Form("where run=%d and detector=\"%s\" and fileId=\"%s\" and DDLnumbers=\"%s\";",
2696                                                         GetCurrentRun(), fCurrentDetector.Data(), fileId, source);
2697                         }
2698
2699                         delete aFXSarray;
2700
2701                         TString sqlQuery = Form("update %s set time_processed=%d %s", fConfig->GetFXSdbTable(system),
2702                                                                 now.GetSec(), whereClause.Data());
2703
2704                         AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
2705
2706                         // Query execution
2707                         TSQLResult* aResult;
2708                         aResult = dynamic_cast<TSQLResult*> (fServer[system]->Query(sqlQuery));
2709                         if (!aResult)
2710                         {
2711                                 Log(fCurrentDetector, Form("UpdateTable - %s db: can't execute SQL query <%s>",
2712                                                                 GetSystemName(system), sqlQuery.Data()));
2713                                 result = kFALSE;
2714                                 continue;
2715                         }
2716                         delete aResult;
2717                 }
2718         }
2719
2720         return result;
2721 }
2722
2723 //______________________________________________________________________________________________
2724 Bool_t AliShuttle::UpdateTableFailCase()
2725 {
2726         // Update FXS table filling time_processed field in all rows corresponding to current run and detector
2727         // this is called in case the preprocessor is declared failed for the current run, because
2728         // the fields are updated only in case of success
2729
2730         Bool_t result = kTRUE;
2731
2732         for (UInt_t system=0; system<3; system++)
2733         {
2734                 // check connection, in case connect
2735                 if (!Connect(system))
2736                 {
2737                         Log(fCurrentDetector, Form("UpdateTableFailCase - Couldn't connect to %s FXS database",
2738                                                         GetSystemName(system)));
2739                         result = kFALSE;
2740                         continue;
2741                 }
2742
2743                 TTimeStamp now; // now
2744
2745                 // Loop on FXS list entries
2746
2747                 TString whereClause = Form("where run=%d and detector=\"%s\";",
2748                                                 GetCurrentRun(), fCurrentDetector.Data());
2749
2750
2751                 TString sqlQuery = Form("update %s set time_processed=%d %s", fConfig->GetFXSdbTable(system),
2752                                                         now.GetSec(), whereClause.Data());
2753
2754                 AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
2755
2756                 // Query execution
2757                 TSQLResult* aResult;
2758                 aResult = dynamic_cast<TSQLResult*> (fServer[system]->Query(sqlQuery));
2759                 if (!aResult)
2760                 {
2761                         Log(fCurrentDetector, Form("UpdateTableFailCase - %s db: can't execute SQL query <%s>",
2762                                                         GetSystemName(system), sqlQuery.Data()));
2763                         result = kFALSE;
2764                         continue;
2765                 }
2766                 delete aResult;
2767         }
2768
2769         return result;
2770 }
2771
2772 //______________________________________________________________________________________________
2773 Bool_t AliShuttle::UpdateShuttleLogbook(const char* detector, const char* status)
2774 {
2775         //
2776         // Update Shuttle logbook filling detector or shuttle_done column
2777         // ex. of usage: UpdateShuttleLogbook("PHOS", "DONE") or UpdateShuttleLogbook("shuttle_done")
2778         //
2779
2780         // check connection, in case connect
2781         if(!Connect(3)){
2782                 Log("SHUTTLE", "UpdateShuttleLogbook - Couldn't connect to DAQ Logbook.");
2783                 return kFALSE;
2784         }
2785
2786         TString detName(detector);
2787         TString setClause;
2788         if (detName == "shuttle_done" || detName == "shuttle_ignored")
2789         {
2790                 setClause = "set shuttle_done=1";
2791
2792                 if (detName == "shuttle_done")
2793                 {
2794                         // Send the information to ML
2795                         TMonaLisaText  mlStatus("SHUTTLE_status", "Done");
2796
2797                         TList mlList;
2798                         mlList.Add(&mlStatus);
2799                 
2800                         TString mlID;
2801                         mlID.Form("%d", GetCurrentRun());
2802                         fMonaLisa->SendParameters(&mlList, mlID);
2803                 }
2804         } else {
2805                 TString statusStr(status);
2806                 if(statusStr.Contains("done", TString::kIgnoreCase) ||
2807                    statusStr.Contains("failed", TString::kIgnoreCase)){
2808                         setClause = Form("set %s=\"%s\"", detector, status);
2809                 } else {
2810                         Log("SHUTTLE",
2811                                 Form("UpdateShuttleLogbook - Invalid status <%s> for detector %s",
2812                                         status, detector));
2813                         return kFALSE;
2814                 }
2815         }
2816
2817         TString whereClause = Form("where run=%d", GetCurrentRun());
2818
2819         TString sqlQuery = Form("update %s %s %s",
2820                                         fConfig->GetShuttlelbTable(), setClause.Data(), whereClause.Data());
2821
2822         AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
2823
2824         // Query execution
2825         TSQLResult* aResult;
2826         aResult = dynamic_cast<TSQLResult*> (fServer[3]->Query(sqlQuery));
2827         if (!aResult) {
2828                 Log("SHUTTLE", Form("UpdateShuttleLogbook - Can't execute query <%s>", sqlQuery.Data()));
2829                 return kFALSE;
2830         }
2831         delete aResult;
2832
2833         return kTRUE;
2834 }
2835
2836 //______________________________________________________________________________________________
2837 Int_t AliShuttle::GetCurrentRun() const
2838 {
2839         //
2840         // Get current run from logbook entry
2841         //
2842
2843         return fLogbookEntry ? fLogbookEntry->GetRun() : -1;
2844 }
2845
2846 //______________________________________________________________________________________________
2847 UInt_t AliShuttle::GetCurrentStartTime() const
2848 {
2849         //
2850         // get current start time
2851         //
2852
2853         return fLogbookEntry ? fLogbookEntry->GetStartTime() : 0;
2854 }
2855
2856 //______________________________________________________________________________________________
2857 UInt_t AliShuttle::GetCurrentEndTime() const
2858 {
2859         //
2860         // get current end time from logbook entry
2861         //
2862
2863         return fLogbookEntry ? fLogbookEntry->GetEndTime() : 0;
2864 }
2865
2866 //______________________________________________________________________________________________
2867 UInt_t AliShuttle::GetCurrentYear() const
2868 {
2869         //
2870         // Get current year from logbook entry
2871         //
2872
2873         if (!fLogbookEntry) return 0;
2874         
2875         TTimeStamp startTime(GetCurrentStartTime());
2876         TString year =  Form("%d",startTime.GetDate());
2877         year = year(0,4);
2878         
2879         return year.Atoi();
2880 }
2881
2882 //______________________________________________________________________________________________
2883 const char* AliShuttle::GetLHCPeriod() const
2884 {
2885         //
2886         // Get current LHC period from logbook entry
2887         //
2888
2889         if (!fLogbookEntry) return 0;
2890                 
2891         return fLogbookEntry->GetRunParameter("LHCperiod");
2892 }
2893
2894 //______________________________________________________________________________________________
2895 void AliShuttle::Log(const char* detector, const char* message)
2896 {
2897         //
2898         // Fill log string with a message
2899         //
2900
2901         TString logRunDir = GetShuttleLogDir();
2902         if (GetCurrentRun() >=0)
2903                 logRunDir += Form("/%d", GetCurrentRun());
2904         
2905         void* dir = gSystem->OpenDirectory(logRunDir.Data());
2906         if (dir == NULL) {
2907                 if (gSystem->mkdir(logRunDir.Data(), kTRUE)) {
2908                         AliError(Form("Can't open directory <%s>", GetShuttleLogDir()));
2909                         return;
2910                 }
2911
2912         } else {
2913                 gSystem->FreeDirectory(dir);
2914         }
2915
2916         TString toLog = Form("%s (%d): %s - ", TTimeStamp(time(0)).AsString("s"), getpid(), detector);
2917         if (GetCurrentRun() >= 0) 
2918                 toLog += Form("run %d - ", GetCurrentRun());
2919         toLog += Form("%s", message);
2920
2921         AliInfo(toLog.Data());
2922         
2923         // if we redirect the log output already to the file, leave here
2924         if (fOutputRedirected && strcmp(detector, "SHUTTLE") != 0)
2925                 return;
2926
2927         TString fileName = GetLogFileName(detector);
2928         
2929         gSystem->ExpandPathName(fileName);
2930
2931         ofstream logFile;
2932         logFile.open(fileName, ofstream::out | ofstream::app);
2933
2934         if (!logFile.is_open()) {
2935                 AliError(Form("Could not open file %s", fileName.Data()));
2936                 return;
2937         }
2938
2939         logFile << toLog.Data() << "\n";
2940
2941         logFile.close();
2942 }
2943
2944 //______________________________________________________________________________________________
2945 TString AliShuttle::GetLogFileName(const char* detector) const
2946 {
2947         // 
2948         // returns the name of the log file for a given sub detector
2949         //
2950         
2951         TString fileName;
2952         
2953         if (GetCurrentRun() >= 0) 
2954         {
2955                 fileName.Form("%s/%d/%s_%d.log", GetShuttleLogDir(), GetCurrentRun(), 
2956                         detector, GetCurrentRun());
2957         } else {
2958                 fileName.Form("%s/%s.log", GetShuttleLogDir(), detector);
2959         }
2960
2961         return fileName;
2962 }
2963
2964 //______________________________________________________________________________________________
2965 void AliShuttle::SendAlive()
2966 {
2967         // sends alive message to ML
2968         
2969         TMonaLisaText mlStatus("SHUTTLE_status", "Alive");
2970
2971         TList mlList;
2972         mlList.Add(&mlStatus);
2973
2974         fMonaLisa->SendParameters(&mlList, "__PROCESSINGINFO__");
2975 }
2976
2977 //______________________________________________________________________________________________
2978 Bool_t AliShuttle::Collect(Int_t run)
2979 {
2980         //
2981         // Collects conditions data for all UNPROCESSED run written to DAQ LogBook in case of run = -1 (default)
2982         // If a dedicated run is given this run is processed
2983         //
2984         // In operational mode, this is the Shuttle function triggered by the EOR signal.
2985         //
2986
2987         if (run == -1)
2988                 Log("SHUTTLE","Collect - Shuttle called. Collecting conditions data for unprocessed runs");
2989         else
2990                 Log("SHUTTLE", Form("Collect - Shuttle called. Collecting conditions data for run %d", run));
2991
2992         SetLastAction("Starting");
2993
2994         // create ML instance
2995         if (!fMonaLisa)
2996                 fMonaLisa = new TMonaLisaWriter(fConfig->GetMonitorHost(), fConfig->GetMonitorTable());
2997                 
2998
2999         SendAlive();
3000
3001         TString whereClause("where shuttle_done=0");
3002         if (run != -1)
3003                 whereClause += Form(" and run=%d", run);
3004
3005         TObjArray shuttleLogbookEntries;
3006         if (!QueryShuttleLogbook(whereClause, shuttleLogbookEntries))
3007         {
3008                 Log("SHUTTLE", "Collect - Can't retrieve entries from Shuttle logbook");
3009                 return kFALSE;
3010         }
3011
3012         if (shuttleLogbookEntries.GetEntries() == 0)
3013         {
3014                 if (run == -1)
3015                         Log("SHUTTLE","Collect - Found no UNPROCESSED runs in Shuttle logbook");
3016                 else
3017                         Log("SHUTTLE", Form("Collect - Run %d is already DONE "
3018                                                 "or it does not exist in Shuttle logbook", run));
3019                 return kTRUE;
3020         }
3021
3022         for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
3023                 fFirstUnprocessed[iDet] = kTRUE;
3024
3025         if (run != -1)
3026         {
3027                 // query Shuttle logbook for earlier runs, check if some detectors are unprocessed,
3028                 // flag them into fFirstUnprocessed array
3029                 TString whereClause(Form("where shuttle_done=0 and run < %d", run));
3030                 TObjArray tmpLogbookEntries;
3031                 if (!QueryShuttleLogbook(whereClause, tmpLogbookEntries))
3032                 {
3033                         Log("SHUTTLE", "Collect - Can't retrieve entries from Shuttle logbook");
3034                         return kFALSE;
3035                 }
3036
3037                 TIter iter(&tmpLogbookEntries);
3038                 AliShuttleLogbookEntry* anEntry = 0;
3039                 while ((anEntry = dynamic_cast<AliShuttleLogbookEntry*> (iter.Next())))
3040                 {
3041                         for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
3042                         {
3043                                 if (anEntry->GetDetectorStatus(iDet) == AliShuttleLogbookEntry::kUnprocessed)
3044                                 {
3045                                         AliDebug(2, Form("Run %d: setting %s as \"not first time unprocessed\"",
3046                                                         anEntry->GetRun(), GetDetName(iDet)));
3047                                         fFirstUnprocessed[iDet] = kFALSE;
3048                                 }
3049                         }
3050
3051                 }
3052
3053         }
3054
3055         if (!RetrieveConditionsData(shuttleLogbookEntries))
3056         {
3057                 Log("SHUTTLE", "Collect - Process of at least one run failed");
3058                 return kFALSE;
3059         }
3060
3061         Log("SHUTTLE", "Collect - Requested run(s) successfully processed");
3062         return kTRUE;
3063 }
3064
3065 //______________________________________________________________________________________________
3066 Bool_t AliShuttle::RetrieveConditionsData(const TObjArray& dateEntries)
3067 {
3068         //
3069         // Retrieve conditions data for all runs that aren't processed yet
3070         //
3071
3072         Bool_t hasError = kFALSE;
3073
3074         TIter iter(&dateEntries);
3075         AliShuttleLogbookEntry* anEntry;
3076
3077         while ((anEntry = (AliShuttleLogbookEntry*) iter.Next())){
3078                 if (!Process(anEntry)){
3079                         hasError = kTRUE;
3080                 }
3081
3082                 // clean SHUTTLE temp directory
3083                 //TString filename = Form("%s/*.shuttle", GetShuttleTempDir());
3084                 //RemoveFile(filename.Data());
3085         }
3086