b53b2396250d94e55929e101155dd380981cf9a0
[u/mrichter/AliRoot.git] / SHUTTLE / AliShuttle.cxx
1 /**************************************************************************
2  * Copyright(c) 1998-1999, ALICE Experiment at CERN, All rights reserved. *
3  *                                                                        *
4  * Author: The ALICE Off-line Project.                                    *
5  * Contributors are mentioned in the code where appropriate.              *
6  *                                                                        *
7  * Permission to use, copy, modify and distribute this software and its   *
8  * documentation strictly for non-commercial purposes is hereby granted   *
9  * without fee, provided that the above copyright notice appears in all   *
10  * copies and that both the copyright notice and this permission notice   *
11  * appear in the supporting documentation. The authors make no claims     *
12  * about the suitability of this software for any purpose. It is          *
13  * provided "as is" without express or implied warranty.                  *
14  **************************************************************************/
15
16 /*
17 $Log$
18 Revision 1.72  2007/12/13 15:44:28  acolla
19 Run type added in mail sent to detector expert (eases understanding)
20
21 Revision 1.71  2007/12/12 14:56:14  jgrosseo
22 sending shuttle_ignore to ML also in case of 0 events
23
24 Revision 1.70  2007/12/12 13:45:35  acolla
25 Monalisa started in Collect() function. Alive message to monitor is sent at each Collect and every minute during preprocessor processing.
26
27 Revision 1.69  2007/12/12 10:06:29  acolla
28 in AliShuttle.cxx: SHUTTLE logbook is updated in case of invalid run times:
29
30 time_start==0 && time_end==0
31
32 logbook is NOT updated if time_start != 0 && time_end == 0, because it may mean that the run is still ongoing.
33
34 Revision 1.68  2007/12/11 10:15:17  acolla
35 Added marking SHUTTLE=DONE for invalid runs
36 (invalid start time or end time) and runs with totalEvents < 1
37
38 Revision 1.67  2007/12/07 19:14:36  acolla
39 in AliShuttleTrigger:
40
41 Added automatic collection of new runs on a regular time basis (settable from the configuration)
42
43 in AliShuttleConfig: new members
44
45 - triggerWait: time to wait for DIM trigger (s) before starting automatic collection of new runs
46 - mode: run mode (test, prod) -> used to build log folder (logs or logs_PROD)
47
48 in AliShuttle:
49
50 - logs now stored in logs/#RUN/DET_#RUN.log
51
52 Revision 1.66  2007/12/05 10:45:19  jgrosseo
53 changed order of arguments to TMonaLisaWriter
54
55 Revision 1.65  2007/11/26 16:58:37  acolla
56 Monalisa configuration added: host and table name
57
58 Revision 1.64  2007/11/13 16:15:47  acolla
59 DCS map is stored in a file in the temp folder where the detector is processed.
60 If the preprocessor fails, the temp folder is not removed. This will help the debugging of the problem.
61
62 Revision 1.63  2007/11/02 10:53:16  acolla
63 Protection added to AliShuttle::CopyFileLocally
64
65 Revision 1.62  2007/10/31 18:23:13  acolla
66 Furter developement on the Shuttle:
67
68 - Shuttle now connects to the Grid as alidaq. The OCDB and Reference folders
69 are now built from /alice/data, e.g.:
70 /alice/data/2007/LHC07a/OCDB
71
72 the year and LHC period are taken from the Shuttle.
73 Raw metadata files are stored by GRP to:
74 /alice/data/2007/LHC07a/<runNb>/Raw/RunMetadata.root
75
76 - Shuttle sends a mail to DCS experts each time DP retrieval fails.
77
78 Revision 1.61  2007/10/30 20:33:51  acolla
79 Improved managing of temporary folders, which weren't correctly handled.
80 Resolved bug introduced in StoreReferenceFile, which caused SPD preprocessor fail.
81
82 Revision 1.60  2007/10/29 18:06:16  acolla
83
84 New function StoreRunMetadataFile added to preprocessor and Shuttle interface
85 This function can be used by GRP only. It stores raw data tags merged file to the
86 raw data folder (e.g. /alice/data/2008/LHC08a/000099999/Raw).
87
88 KNOWN ISSUES:
89
90 1. Shuttle cannot write to /alice/data/ because it belongs to alidaq. Tag file is stored in /alice/simulation/... for the time being.
91 2. Due to a bug in TAlien::Mkdir, the creation of a folder in recursive mode (-p option) does not work. The problem
92 has been corrected in the root package on the Shuttle machine.
93
94 Revision 1.59  2007/10/05 12:40:55  acolla
95
96 Result error code added to AliDCSClient data members (it was "lost" with the new implementation of TMap* GetAliasValues and GetDPValues).
97
98 Revision 1.58  2007/09/28 15:27:40  acolla
99
100 AliDCSClient "multiSplit" option added in the DCS configuration
101 in AliDCSMessage: variable MAX_BODY_SIZE set to 500000
102
103 Revision 1.57  2007/09/27 16:53:13  acolla
104 Detectors can have more than one AMANDA server. SHUTTLE queries the servers sequentially,
105 merges the dcs aliases/DPs in one TMap and sends it to the preprocessor.
106
107 Revision 1.56  2007/09/14 16:46:14  jgrosseo
108 1) Connect and Close are called before and after each query, so one can
109 keep the same AliDCSClient object.
110 2) The splitting of a query is moved to GetDPValues/GetAliasValues.
111 3) Splitting interval can be specified in constructor
112
113 Revision 1.55  2007/08/06 12:26:40  acolla
114 Function Bool_t GetHLTStatus added to preprocessor. It returns the status of HLT
115 read from the run logbook.
116
117 Revision 1.54  2007/07/12 09:51:25  jgrosseo
118 removed duplicated log message in GetFile
119
120 Revision 1.53  2007/07/12 09:26:28  jgrosseo
121 updating hlt fxs base path
122
123 Revision 1.52  2007/07/12 08:06:45  jgrosseo
124 adding log messages in getfile... functions
125 adding not implemented copy constructor in alishuttleconfigholder
126
127 Revision 1.51  2007/07/03 17:24:52  acolla
128 root moved to v5-16-00. TFileMerger->Cp moved to TFile::Cp.
129
130 Revision 1.50  2007/07/02 17:19:32  acolla
131 preprocessor is run in a temp directory that is removed when process is finished.
132
133 Revision 1.49  2007/06/29 10:45:06  acolla
134 Number of columns in MySql Shuttle logbook increased by one (HLT added)
135
136 Revision 1.48  2007/06/21 13:06:19  acolla
137 GetFileSources returns dummy list with 1 source if system=DCS (better than
138 returning error as it was)
139
140 Revision 1.47  2007/06/19 17:28:56  acolla
141 HLT updated; missing map bug removed.
142
143 Revision 1.46  2007/06/09 13:01:09  jgrosseo
144 Switching to retrieval of several DCS DPs at a time (multiDPrequest)
145
146 Revision 1.45  2007/05/30 06:35:20  jgrosseo
147 Adding functionality to the Shuttle/TestShuttle:
148 o) Function to retrieve list of sources from a given system (GetFileSources with id=0)
149 o) Function to retrieve list of IDs for a given source      (GetFileIDs)
150 These functions are needed for dealing with the tag files that are saved for the GRP preprocessor
151 Example code has been added to the TestProcessor in TestShuttle
152
153 Revision 1.44  2007/05/11 16:09:32  acolla
154 Reference files for ITS, MUON and PHOS are now stored in OfflineDetName/OnlineDetName/run_...
155 example: ITS/SPD/100_filename.root
156
157 Revision 1.43  2007/05/10 09:59:51  acolla
158 Various bug fixes in StoreRefFilesToGrid; Cleaning of reference storage before processing detector (CleanReferenceStorage)
159
160 Revision 1.42  2007/05/03 08:01:39  jgrosseo
161 typo in last commit :-(
162
163 Revision 1.41  2007/05/03 08:00:48  jgrosseo
164 fixing log message when pp want to skip dcs value retrieval
165
166 Revision 1.40  2007/04/27 07:06:48  jgrosseo
167 GetFileSources returns empty list in case of no files, but successful query
168 No mails sent in testmode
169
170 Revision 1.39  2007/04/17 12:43:57  acolla
171 Correction in StoreOCDB; change of text in mail to detector expert
172
173 Revision 1.38  2007/04/12 08:26:18  jgrosseo
174 updated comment
175
176 Revision 1.37  2007/04/10 16:53:14  jgrosseo
177 redirecting sub detector stdout, stderr to sub detector log file
178
179 Revision 1.35  2007/04/04 16:26:38  acolla
180 1. Re-organization of function calls in TestPreprocessor to make it more meaningful.
181 2. Added missing dependency in test preprocessors.
182 3. in AliShuttle.cxx: processing time and memory consumption info on a single line.
183
184 Revision 1.34  2007/04/04 10:33:36  jgrosseo
185 1) Storing of files to the Grid is now done _after_ your preprocessors succeeded. This is transparent, which means that you can still use the same functions (Store, StoreReferenceData) to store files to the Grid. However, the Shuttle first stores them locally and transfers them after the preprocessor finished. The return code of these two functions has changed from UInt_t to Bool_t which gives you the success of the storing.
186 In case of an error with the Grid, the Shuttle will retry the storing later, the preprocessor does not need to be run again.
187
188 2) The meaning of the return code of the preprocessor has changed. 0 is now success and any other value means failure. This value is stored in the log and you can use it to keep details about the error condition.
189
190 3) New function StoreReferenceFile to _directly_ store a file (without opening it) to the reference storage.
191
192 4) The memory usage of the preprocessor is monitored. If it exceeds 2 GB it is terminated.
193
194 5) New function AliPreprocessor::ProcessDCS(). If you do not need to have DCS data in all cases, you can skip the processing by implemting this function and returning kFALSE under certain conditions. E.g. if there is a certain run type.
195 If you always need DCS data (like before), you do not need to implement it.
196
197 6) The run type has been added to the monitoring page
198
199 Revision 1.33  2007/04/03 13:56:01  acolla
200 Grid Storage at the end of preprocessing. Added virtual method to disable DCS query according to the
201 run type.
202
203 Revision 1.32  2007/02/28 10:41:56  acolla
204 Run type field added in SHUTTLE framework. Run type is read from "run type" logbook and retrieved by
205 AliPreprocessor::GetRunType() function.
206 Added some ldap definition files.
207
208 Revision 1.30  2007/02/13 11:23:21  acolla
209 Moved getters and setters of Shuttle's main OCDB/Reference, local
210 OCDB/Reference, temp and log folders to AliShuttleInterface
211
212 Revision 1.27  2007/01/30 17:52:42  jgrosseo
213 adding monalisa monitoring
214
215 Revision 1.26  2007/01/23 19:20:03  acolla
216 Removed old ldif files, added TOF, MCH ldif files. Added some options in
217 AliShuttleConfig::Print. Added in Ali Shuttle: SetShuttleTempDir and
218 SetShuttleLogDir
219
220 Revision 1.25  2007/01/15 19:13:52  acolla
221 Moved some AliInfo to AliDebug in SendMail function
222
223 Revision 1.21  2006/12/07 08:51:26  jgrosseo
224 update (alberto):
225 table, db names in ldap configuration
226 added GRP preprocessor
227 DCS data can also be retrieved by data point
228
229 Revision 1.20  2006/11/16 16:16:48  jgrosseo
230 introducing strict run ordering flag
231 removed giving preprocessor name to preprocessor, they have to know their name themselves ;-)
232
233 Revision 1.19  2006/11/06 14:23:04  jgrosseo
234 major update (Alberto)
235 o) reading of run parameters from the logbook
236 o) online offline naming conversion
237 o) standalone DCSclient package
238
239 Revision 1.18  2006/10/20 15:22:59  jgrosseo
240 o) Adding time out to the execution of the preprocessors: The Shuttle forks and the parent process monitors the child
241 o) Merging Collect, CollectAll, CollectNew function
242 o) Removing implementation of empty copy constructors (declaration still there!)
243
244 Revision 1.17  2006/10/05 16:20:55  jgrosseo
245 adapting to new CDB classes
246
247 Revision 1.16  2006/10/05 15:46:26  jgrosseo
248 applying to the new interface
249
250 Revision 1.15  2006/10/02 16:38:39  jgrosseo
251 update (alberto):
252 fixed memory leaks
253 storing of objects that failed to be stored to the grid before
254 interfacing of shuttle status table in daq system
255
256 Revision 1.14  2006/08/29 09:16:05  jgrosseo
257 small update
258
259 Revision 1.13  2006/08/15 10:50:00  jgrosseo
260 effc++ corrections (alberto)
261
262 Revision 1.12  2006/08/08 14:19:29  jgrosseo
263 Update to shuttle classes (Alberto)
264
265 - Possibility to set the full object's path in the Preprocessor's and
266 Shuttle's  Store functions
267 - Possibility to extend the object's run validity in the same classes
268 ("startValidity" and "validityInfinite" parameters)
269 - Implementation of the StoreReferenceData function to store reference
270 data in a dedicated CDB storage.
271
272 Revision 1.11  2006/07/21 07:37:20  jgrosseo
273 last run is stored after each run
274
275 Revision 1.10  2006/07/20 09:54:40  jgrosseo
276 introducing status management: The processing per subdetector is divided into several steps,
277 after each step the status is stored on disk. If the system crashes in any of the steps the Shuttle
278 can keep track of the number of failures and skips further processing after a certain threshold is
279 exceeded. These thresholds can be configured in LDAP.
280
281 Revision 1.9  2006/07/19 10:09:55  jgrosseo
282 new configuration, accesst to DAQ FES (Alberto)
283
284 Revision 1.8  2006/07/11 12:44:36  jgrosseo
285 adding parameters for extended validity range of data produced by preprocessor
286
287 Revision 1.7  2006/07/10 14:37:09  jgrosseo
288 small fix + todo comment
289
290 Revision 1.6  2006/07/10 13:01:41  jgrosseo
291 enhanced storing of last sucessfully processed run (alberto)
292
293 Revision 1.5  2006/07/04 14:59:57  jgrosseo
294 revision of AliDCSValue: Removed wrapper classes, reduced storage size per value by factor 2
295
296 Revision 1.4  2006/06/12 09:11:16  jgrosseo
297 coding conventions (Alberto)
298
299 Revision 1.3  2006/06/06 14:26:40  jgrosseo
300 o) removed files that were moved to STEER
301 o) shuttle updated to follow the new interface (Alberto)
302
303 Revision 1.2  2006/03/07 07:52:34  hristov
304 New version (B.Yordanov)
305
306 Revision 1.6  2005/11/19 17:19:14  byordano
307 RetrieveDATEEntries and RetrieveConditionsData added
308
309 Revision 1.5  2005/11/19 11:09:27  byordano
310 AliShuttle declaration added
311
312 Revision 1.4  2005/11/17 17:47:34  byordano
313 TList changed to TObjArray
314
315 Revision 1.3  2005/11/17 14:43:23  byordano
316 import to local CVS
317
318 Revision 1.1.1.1  2005/10/28 07:33:58  hristov
319 Initial import as subdirectory in AliRoot
320
321 Revision 1.2  2005/09/13 08:41:15  byordano
322 default startTime endTime added
323
324 Revision 1.4  2005/08/30 09:13:02  byordano
325 some docs added
326
327 Revision 1.3  2005/08/29 21:15:47  byordano
328 some docs added
329
330 */
331
332 //
333 // This class is the main manager for AliShuttle. 
334 // It organizes the data retrieval from DCS and call the 
335 // interface methods of AliPreprocessor.
336 // For every detector in AliShuttleConfgi (see AliShuttleConfig),
337 // data for its set of aliases is retrieved. If there is registered
338 // AliPreprocessor for this detector then it will be used
339 // accroding to the schema (see AliPreprocessor).
340 // If there isn't registered AliPreprocessor than the retrieved
341 // data is stored automatically to the undelying AliCDBStorage.
342 // For detSpec is used the alias name.
343 //
344
345 #include "AliShuttle.h"
346
347 #include "AliCDBManager.h"
348 #include "AliCDBStorage.h"
349 #include "AliCDBId.h"
350 #include "AliCDBRunRange.h"
351 #include "AliCDBPath.h"
352 #include "AliCDBEntry.h"
353 #include "AliShuttleConfig.h"
354 #include "DCSClient/AliDCSClient.h"
355 #include "AliLog.h"
356 #include "AliPreprocessor.h"
357 #include "AliShuttleStatus.h"
358 #include "AliShuttleLogbookEntry.h"
359
360 #include <TSystem.h>
361 #include <TObject.h>
362 #include <TString.h>
363 #include <TTimeStamp.h>
364 #include <TObjString.h>
365 #include <TSQLServer.h>
366 #include <TSQLResult.h>
367 #include <TSQLRow.h>
368 #include <TMutex.h>
369 #include <TSystemDirectory.h>
370 #include <TSystemFile.h>
371 #include <TFile.h>
372 #include <TGrid.h>
373 #include <TGridResult.h>
374
375 #include <TMonaLisaWriter.h>
376
377 #include <fstream>
378
379 #include <sys/types.h>
380 #include <sys/wait.h>
381
382 ClassImp(AliShuttle)
383
384 //______________________________________________________________________________________________
385 AliShuttle::AliShuttle(const AliShuttleConfig* config,
386                 UInt_t timeout, Int_t retries):
387 fConfig(config),
388 fTimeout(timeout), fRetries(retries),
389 fPreprocessorMap(),
390 fLogbookEntry(0),
391 fCurrentDetector(),
392 fStatusEntry(0),
393 fMonitoringMutex(0),
394 fLastActionTime(0),
395 fLastAction(),
396 fMonaLisa(0),
397 fTestMode(kNone),
398 fReadTestMode(kFALSE),
399 fOutputRedirected(kFALSE)
400 {
401         //
402         // config: AliShuttleConfig used
403         // timeout: timeout used for AliDCSClient connection
404         // retries: the number of retries in case of connection error.
405         //
406
407         if (!fConfig->IsValid()) AliFatal("********** !!!!! Invalid configuration !!!!! **********");
408         for(int iSys=0;iSys<4;iSys++) {
409                 fServer[iSys]=0;
410                 if (iSys < 3)
411                         fFXSlist[iSys].SetOwner(kTRUE);
412         }
413         fPreprocessorMap.SetOwner(kTRUE);
414
415         for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
416                 fFirstUnprocessed[iDet] = kFALSE;
417
418         fMonitoringMutex = new TMutex();
419 }
420
421 //______________________________________________________________________________________________
422 AliShuttle::~AliShuttle()
423 {
424         //
425         // destructor
426         //
427
428         fPreprocessorMap.DeleteAll();
429         for(int iSys=0;iSys<4;iSys++)
430                 if(fServer[iSys]) {
431                         fServer[iSys]->Close();
432                         delete fServer[iSys];
433                         fServer[iSys] = 0;
434                 }
435
436         if (fStatusEntry){
437                 delete fStatusEntry;
438                 fStatusEntry = 0;
439         }
440         
441         if (fMonitoringMutex) 
442         {
443                 delete fMonitoringMutex;
444                 fMonitoringMutex = 0;
445         }
446 }
447
448 //______________________________________________________________________________________________
449 void AliShuttle::RegisterPreprocessor(AliPreprocessor* preprocessor)
450 {
451         //
452         // Registers new AliPreprocessor.
453         // It uses GetName() for indentificator of the pre processor.
454         // The pre processor is registered it there isn't any other
455         // with the same identificator (GetName()).
456         //
457
458         const char* detName = preprocessor->GetName();
459         if(GetDetPos(detName) < 0)
460                 AliFatal(Form("********** !!!!! Invalid detector name: %s !!!!! **********", detName));
461
462         if (fPreprocessorMap.GetValue(detName)) {
463                 AliWarning(Form("AliPreprocessor %s is already registered!", detName));
464                 return;
465         }
466
467         fPreprocessorMap.Add(new TObjString(detName), preprocessor);
468 }
469 //______________________________________________________________________________________________
470 Bool_t AliShuttle::Store(const AliCDBPath& path, TObject* object,
471                 AliCDBMetaData* metaData, Int_t validityStart, Bool_t validityInfinite)
472 {
473         // Stores a CDB object in the storage for offline reconstruction. Objects that are not needed for
474         // offline reconstruction, but should be stored anyway (e.g. for debugging) should NOT be stored
475         // using this function. Use StoreReferenceData instead!
476         // It calls StoreLocally function which temporarily stores the data locally; when the preprocessor
477         // finishes the data are transferred to the main storage (Grid).
478
479         return StoreLocally(fgkLocalCDB, path, object, metaData, validityStart, validityInfinite);
480 }
481
482 //______________________________________________________________________________________________
483 Bool_t AliShuttle::StoreReferenceData(const AliCDBPath& path, TObject* object, AliCDBMetaData* metaData)
484 {
485         // Stores a CDB object in the storage for reference data. This objects will not be available during
486         // offline reconstrunction. Use this function for reference data only!
487         // It calls StoreLocally function which temporarily stores the data locally; when the preprocessor
488         // finishes the data are transferred to the main storage (Grid).
489
490         return StoreLocally(fgkLocalRefStorage, path, object, metaData);
491 }
492
493 //______________________________________________________________________________________________
494 Bool_t AliShuttle::StoreLocally(const TString& localUri,
495                         const AliCDBPath& path, TObject* object, AliCDBMetaData* metaData,
496                         Int_t validityStart, Bool_t validityInfinite)
497 {
498         // Store object temporarily in local storage. Parameters are passed by Store and StoreReferenceData functions.
499         // when the preprocessor finishes the data are transferred to the main storage (Grid).
500         // The parameters are:
501         //   1) Uri of the backup storage (Local)
502         //   2) the object's path.
503         //   3) the object to be stored
504         //   4) the metaData to be associated with the object
505         //   5) the validity start run number w.r.t. the current run,
506         //      if the data is valid only for this run leave the default 0
507         //   6) specifies if the calibration data is valid for infinity (this means until updated),
508         //      typical for calibration runs, the default is kFALSE
509         //
510         // returns 0 if fail, 1 otherwise
511
512         if (fTestMode & kErrorStorage)
513         {
514                 Log(fCurrentDetector, "StoreLocally - In TESTMODE - Simulating error while storing locally");
515                 return kFALSE;
516         }
517         
518         const char* cdbType = (localUri == fgkLocalCDB) ? "CDB" : "Reference";
519
520         Int_t firstRun = GetCurrentRun() - validityStart;
521         if(firstRun < 0) {
522                 AliWarning("First valid run happens to be less than 0! Setting it to 0.");
523                 firstRun=0;
524         }
525
526         Int_t lastRun = -1;
527         if(validityInfinite) {
528                 lastRun = AliCDBRunRange::Infinity();
529         } else {
530                 lastRun = GetCurrentRun();
531         }
532
533         // Version is set to current run, it will be used later to transfer data to Grid
534         AliCDBId id(path, firstRun, lastRun, GetCurrentRun(), -1);
535
536         if(! dynamic_cast<TObjString*> (metaData->GetProperty("RunUsed(TObjString)"))){
537                 TObjString runUsed = Form("%d", GetCurrentRun());
538                 metaData->SetProperty("RunUsed(TObjString)", runUsed.Clone());
539         }
540
541         Bool_t result = kFALSE;
542
543         if (!(AliCDBManager::Instance()->GetStorage(localUri))) {
544                 Log("SHUTTLE", Form("StoreLocally - Cannot activate local %s storage", cdbType));
545         } else {
546                 result = AliCDBManager::Instance()->GetStorage(localUri)
547                                         ->Put(object, id, metaData);
548         }
549
550         if(!result) {
551
552                 Log(fCurrentDetector, Form("StoreLocally - Can't store object <%s>!", id.ToString().Data()));
553         }
554
555         return result;
556 }
557
558 //______________________________________________________________________________________________
559 Bool_t AliShuttle::StoreOCDB()
560 {
561         //
562         // Called when preprocessor ends successfully or when previous storage attempt failed (kStoreError status)
563         // Calls underlying StoreOCDB(const char*) function twice, for OCDB and Reference storage.
564         // Then calls StoreRefFilesToGrid to store reference files. 
565         //
566         
567         if (fTestMode & kErrorGrid)
568         {
569                 Log("SHUTTLE", "StoreOCDB - In TESTMODE - Simulating error while storing in the Grid");
570                 Log(fCurrentDetector, "StoreOCDB - In TESTMODE - Simulating error while storing in the Grid");
571                 return kFALSE;
572         }
573         
574         Log("SHUTTLE","StoreOCDB - Storing OCDB data ...");
575         Bool_t resultCDB = StoreOCDB(fgkMainCDB);
576
577         Log("SHUTTLE","StoreOCDB - Storing reference data ...");
578         Bool_t resultRef = StoreOCDB(fgkMainRefStorage);
579         
580         Log("SHUTTLE","StoreOCDB - Storing reference files ...");
581         Bool_t resultRefFiles = CopyFilesToGrid("reference");
582         
583         Bool_t resultMetadata = kTRUE;
584         if(fCurrentDetector == "GRP") 
585         {
586                 Log("StoreOCDB - SHUTTLE","Storing Run Metadata file ...");
587                 resultMetadata = CopyFilesToGrid("metadata");
588         }
589         
590         return resultCDB && resultRef && resultRefFiles && resultMetadata;
591 }
592
593 //______________________________________________________________________________________________
594 Bool_t AliShuttle::StoreOCDB(const TString& gridURI)
595 {
596         //
597         // Called by StoreOCDB(), performs actual storage to the main OCDB and reference storages (Grid)
598         //
599
600         TObjArray* gridIds=0;
601
602         Bool_t result = kTRUE;
603
604         const char* type = 0;
605         TString localURI;
606         if(gridURI == fgkMainCDB) {
607                 type = "OCDB";
608                 localURI = fgkLocalCDB;
609         } else if(gridURI == fgkMainRefStorage) {
610                 type = "reference";
611                 localURI = fgkLocalRefStorage;
612         } else {
613                 AliError(Form("Invalid storage URI: %s", gridURI.Data()));
614                 return kFALSE;
615         }
616
617         AliCDBManager* man = AliCDBManager::Instance();
618
619         AliCDBStorage *gridSto = man->GetStorage(gridURI);
620         if(!gridSto) {
621                 Log("SHUTTLE",
622                         Form("StoreOCDB - cannot activate main %s storage", type));
623                 return kFALSE;
624         }
625
626         gridIds = gridSto->GetQueryCDBList();
627
628         // get objects previously stored in local CDB
629         AliCDBStorage *localSto = man->GetStorage(localURI);
630         if(!localSto) {
631                 Log("SHUTTLE",
632                         Form("StoreOCDB - cannot activate local %s storage", type));
633                 return kFALSE;
634         }
635         AliCDBPath aPath(GetOfflineDetName(fCurrentDetector.Data()),"*","*");
636         // Local objects were stored with current run as Grid version!
637         TList* localEntries = localSto->GetAll(aPath.GetPath(), GetCurrentRun(), GetCurrentRun());
638         localEntries->SetOwner(1);
639
640         // loop on local stored objects
641         TIter localIter(localEntries);
642         AliCDBEntry *aLocEntry = 0;
643         while((aLocEntry = dynamic_cast<AliCDBEntry*> (localIter.Next()))){
644                 aLocEntry->SetOwner(1);
645                 AliCDBId aLocId = aLocEntry->GetId();
646                 aLocEntry->SetVersion(-1);
647                 aLocEntry->SetSubVersion(-1);
648
649                 // If local object is valid up to infinity we store it only if it is
650                 // the first unprocessed run!
651                 if (aLocId.GetLastRun() == AliCDBRunRange::Infinity() &&
652                         !fFirstUnprocessed[GetDetPos(fCurrentDetector)])
653                 {
654                         Log("SHUTTLE", Form("StoreOCDB - %s: object %s has validity infinite but "
655                                                 "there are previous unprocessed runs!",
656                                                 fCurrentDetector.Data(), aLocId.GetPath().Data()));
657                         continue;
658                 }
659
660                 // loop on Grid valid Id's
661                 Bool_t store = kTRUE;
662                 TIter gridIter(gridIds);
663                 AliCDBId* aGridId = 0;
664                 while((aGridId = dynamic_cast<AliCDBId*> (gridIter.Next()))){
665                         if(aGridId->GetPath() != aLocId.GetPath()) continue;
666                         // skip all objects valid up to infinity
667                         if(aGridId->GetLastRun() == AliCDBRunRange::Infinity()) continue;
668                         // if we get here, it means there's already some more recent object stored on Grid!
669                         store = kFALSE;
670                         break;
671                 }
672
673                 // If we get here, the file can be stored!
674                 Bool_t storeOk = gridSto->Put(aLocEntry);
675                 if(!store || storeOk){
676
677                         if (!store)
678                         {
679                                 Log(fCurrentDetector.Data(),
680                                         Form("StoreOCDB - A more recent object already exists in %s storage: <%s>",
681                                                 type, aGridId->ToString().Data()));
682                         } else {
683                                 Log("SHUTTLE",
684                                         Form("StoreOCDB - Object <%s> successfully put into %s storage",
685                                                 aLocId.ToString().Data(), type));
686                                 Log(fCurrentDetector.Data(),
687                                         Form("StoreOCDB - Object <%s> successfully put into %s storage",
688                                                 aLocId.ToString().Data(), type));
689                         }
690
691                         // removing local filename...
692                         TString filename;
693                         localSto->IdToFilename(aLocId, filename);
694                         Log("SHUTTLE", Form("StoreOCDB - Removing local file %s", filename.Data()));
695                         RemoveFile(filename.Data());
696                         continue;
697                 } else  {
698                         Log("SHUTTLE",
699                                 Form("StoreOCDB - Grid %s storage of object <%s> failed",
700                                         type, aLocId.ToString().Data()));
701                         Log(fCurrentDetector.Data(),
702                                 Form("StoreOCDB - Grid %s storage of object <%s> failed",
703                                         type, aLocId.ToString().Data()));
704                         result = kFALSE;
705                 }
706         }
707         localEntries->Clear();
708
709         return result;
710 }
711
712 //______________________________________________________________________________________________
713 Bool_t AliShuttle::CleanReferenceStorage(const char* detector)
714 {
715         // clears the directory used to store reference files of a given subdetector
716   
717         AliCDBManager* man = AliCDBManager::Instance();
718         AliCDBStorage* sto = man->GetStorage(fgkLocalRefStorage);
719         TString localBaseFolder = sto->GetBaseFolder();
720
721         TString targetDir = GetRefFilePrefix(localBaseFolder.Data(), detector);
722         
723         Log("SHUTTLE", Form("CleanReferenceStorage - Cleaning %s", targetDir.Data()));
724
725         TString begin;
726         begin.Form("%d_", GetCurrentRun());
727         
728         TSystemDirectory* baseDir = new TSystemDirectory("/", targetDir);
729         if (!baseDir)
730                 return kTRUE;
731                 
732         TList* dirList = baseDir->GetListOfFiles();
733         delete baseDir;
734         
735         if (!dirList) return kTRUE;
736                         
737         if (dirList->GetEntries() < 3) 
738         {
739                 delete dirList;
740                 return kTRUE;
741         }
742                                 
743         Int_t nDirs = 0, nDel = 0;
744         TIter dirIter(dirList);
745         TSystemFile* entry = 0;
746
747         Bool_t success = kTRUE;
748         
749         while ((entry = dynamic_cast<TSystemFile*> (dirIter.Next())))
750         {                                       
751                 if (entry->IsDirectory())
752                         continue;
753                 
754                 TString fileName(entry->GetName());
755                 if (!fileName.BeginsWith(begin))
756                         continue;
757                         
758                 nDirs++;
759                                                 
760                 // delete file
761                 Int_t result = gSystem->Unlink(fileName.Data());
762                 
763                 if (result)
764                 {
765                         Log("SHUTTLE", Form("CleanReferenceStorage - Could not delete file %s!", fileName.Data()));
766                         success = kFALSE;
767                 } else {
768                         nDel++;
769                 }
770         }
771
772         if(nDirs > 0)
773                 Log("SHUTTLE", Form("CleanReferenceStorage - %d (over %d) reference files in folder %s were deleted.", 
774                         nDel, nDirs, targetDir.Data()));
775
776                 
777         delete dirList;
778         return success;
779
780
781
782
783
784
785   Int_t result = gSystem->GetPathInfo(targetDir, 0, (Long64_t*) 0, 0, 0);
786   if (result == 0)
787   {
788     // delete directory
789     result = gSystem->Exec(Form("rm -rf %s", targetDir.Data()));
790     if (result != 0)
791     {  
792       Log("SHUTTLE", Form("CleanReferenceStorage - Could not clean directory %s", targetDir.Data()));
793       return kFALSE;
794     }
795   }
796
797   result = gSystem->mkdir(targetDir, kTRUE);
798   if (result != 0)
799   {
800     Log("SHUTTLE", Form("CleanReferenceStorage - Error creating base directory %s", targetDir.Data()));
801     return kFALSE;
802   }
803         
804   return kTRUE;
805 }
806
807 //______________________________________________________________________________________________
808 Bool_t AliShuttle::StoreReferenceFile(const char* detector, const char* localFile, const char* gridFileName)
809 {
810         //
811         // Stores reference file directly (without opening it). This function stores the file locally.
812         //
813         // The file is stored under the following location: 
814         // <base folder of local reference storage>/<DET>/<RUN#>_<gridFileName>
815         // where <gridFileName> is the second parameter given to the function
816         // 
817         
818         if (fTestMode & kErrorStorage)
819         {
820                 Log(fCurrentDetector, "StoreReferenceFile - In TESTMODE - Simulating error while storing locally");
821                 return kFALSE;
822         }
823         
824         AliCDBManager* man = AliCDBManager::Instance();
825         AliCDBStorage* sto = man->GetStorage(fgkLocalRefStorage);
826         
827         TString localBaseFolder = sto->GetBaseFolder();
828         
829         TString target = GetRefFilePrefix(localBaseFolder.Data(), detector);    
830         target.Append(Form("/%d_%s", GetCurrentRun(), gridFileName));
831         
832         return CopyFileLocally(localFile, target);
833 }
834
835 //______________________________________________________________________________________________
836 Bool_t AliShuttle::StoreRunMetadataFile(const char* localFile, const char* gridFileName)
837 {
838         //
839         // Stores Run metadata file to the Grid, in the run folder
840         //
841         // Only GRP can call this function.
842         
843         if (fTestMode & kErrorStorage)
844         {
845                 Log(fCurrentDetector, "StoreRunMetaDataFile - In TESTMODE - Simulating error while storing locally");
846                 return kFALSE;
847         }
848         
849         AliCDBManager* man = AliCDBManager::Instance();
850         AliCDBStorage* sto = man->GetStorage(fgkLocalRefStorage);
851         
852         TString localBaseFolder = sto->GetBaseFolder();
853         
854         // Build Run level folder
855         // folder = /alice/data/year/lhcPeriod/runNb/Raw
856         
857                 
858         TString lhcPeriod = GetLHCPeriod();     
859         if (lhcPeriod.Length() == 0) 
860         {
861                 Log("SHUTTLE","StoreRunMetaDataFile - LHCPeriod not found in logbook!");
862                 return 0;
863         }
864         
865         TString target = Form("%s/GRP/RunMetadata/alice/data/%d/%s/%09d/Raw/%s", 
866                                 localBaseFolder.Data(), GetCurrentYear(), 
867                                 lhcPeriod.Data(), GetCurrentRun(), gridFileName);
868                                         
869         return CopyFileLocally(localFile, target);
870 }
871
872 //______________________________________________________________________________________________
873 Bool_t AliShuttle::CopyFileLocally(const char* localFile, const TString& target)
874 {
875         //
876         // Stores file locally. Called by StoreReferenceFile and StoreRunMetadataFile
877         // Files are temporarily stored in the local reference storage. When the preprocessor 
878         // finishes, the Shuttle calls CopyFilesToGrid to transfer the files to AliEn 
879         // (in reference or run level folders)
880         //
881         
882         TString targetDir(target(0, target.Last('/')));
883         
884         //try to open base dir folder, if it does not exist
885         void* dir = gSystem->OpenDirectory(targetDir.Data());
886         if (dir == NULL) {
887                 if (gSystem->mkdir(targetDir.Data(), kTRUE)) {
888                         Log("SHUTTLE", Form("StoreFileLocally - Can't open directory <%s>", targetDir.Data()));
889                         return kFALSE;
890                 }
891
892         } else {
893                 gSystem->FreeDirectory(dir);
894         }
895         
896         Int_t result = 0;
897         
898         result = gSystem->GetPathInfo(localFile, 0, (Long64_t*) 0, 0, 0);
899         if (result)
900         {
901                 Log("SHUTTLE", Form("StoreFileLocally - %s does not exist", localFile));
902                 return kFALSE;
903         }
904
905         result = gSystem->GetPathInfo(target, 0, (Long64_t*) 0, 0, 0);
906         if (!result)
907         {
908                 Log("SHUTTLE", Form("StoreFileLocally - target file %s already exist, removing...", target.Data()));
909                 if (gSystem->Unlink(target.Data()))
910                 {
911                         Log("SHUTTLE", Form("StoreFileLocally - Could not remove existing target file %s!", target.Data()));
912                         return kFALSE;
913                 }
914         }       
915         
916         result = gSystem->CopyFile(localFile, target);
917
918         if (result == 0)
919         {
920                 Log("SHUTTLE", Form("StoreFileLocally - File %s stored locally to %s", localFile, target.Data()));
921                 return kTRUE;
922         }
923         else
924         {
925                 Log("SHUTTLE", Form("StoreFileLocally - Could not store file %s to %s! Error code = %d", 
926                                 localFile, target.Data(), result));
927                 return kFALSE;
928         }       
929
930
931
932 }
933
934 //______________________________________________________________________________________________
935 Bool_t AliShuttle::CopyFilesToGrid(const char* type)
936 {
937         //
938         // Transfers local files to the Grid. Local files can be reference files 
939         // or run metadata file (from GRP only).
940         //
941         // According to the type (ref, metadata) the files are stored under the following location: 
942         // ref --> <base folder of reference storage>/<DET>/<RUN#>_<gridFileName>
943         // metadata --> <run data folder>/<MetadataFileName>
944         //
945                 
946         AliCDBManager* man = AliCDBManager::Instance();
947         AliCDBStorage* sto = man->GetStorage(fgkLocalRefStorage);
948         if (!sto)
949                 return kFALSE;
950         TString localBaseFolder = sto->GetBaseFolder();
951         
952         TString dir;
953         TString alienDir;
954         TString begin;
955         
956         if (strcmp(type, "reference") == 0) 
957         {
958                 dir = GetRefFilePrefix(localBaseFolder.Data(), fCurrentDetector.Data());
959                 AliCDBStorage* gridSto = man->GetStorage(fgkMainRefStorage);
960                 if (!gridSto)
961                         return kFALSE;
962                 TString gridBaseFolder = gridSto->GetBaseFolder();
963                 alienDir = GetRefFilePrefix(gridBaseFolder.Data(), fCurrentDetector.Data());
964                 begin = Form("%d_", GetCurrentRun());
965         } 
966         else if (strcmp(type, "metadata") == 0)
967         {
968                         
969                 TString lhcPeriod = GetLHCPeriod();
970         
971                 if (lhcPeriod.Length() == 0) 
972                 {
973                         Log("SHUTTLE","CopyFilesToGrid - LHCPeriod not found in logbook!");
974                         return 0;
975                 }
976                 
977                 dir = Form("%s/GRP/RunMetadata/alice/data/%d/%s/%09d/Raw", 
978                                 localBaseFolder.Data(), GetCurrentYear(), 
979                                 lhcPeriod.Data(), GetCurrentRun());
980                 alienDir = dir(dir.Index("/alice/data/"), dir.Length());
981                 
982                 begin = "";
983         }
984         else 
985         {
986                 Log("SHUTTLE", "CopyFilesToGrid - Unexpected: type label must be reference or metadata!");
987                 return kFALSE;
988         }
989                 
990         TSystemDirectory* baseDir = new TSystemDirectory("/", dir);
991         if (!baseDir)
992                 return kTRUE;
993                 
994         TList* dirList = baseDir->GetListOfFiles();
995         delete baseDir;
996         
997         if (!dirList) return kTRUE;
998                 
999         if (dirList->GetEntries() < 3) 
1000         {
1001                 delete dirList;
1002                 return kTRUE;
1003         }
1004                         
1005         if (!gGrid)
1006         { 
1007                 Log("SHUTTLE", "CopyFilesToGrid - Connection to Grid failed: Cannot continue!");
1008                 delete dirList;
1009                 return kFALSE;
1010         }
1011         
1012         Int_t nDirs = 0, nTransfer = 0;
1013         TIter dirIter(dirList);
1014         TSystemFile* entry = 0;
1015
1016         Bool_t success = kTRUE;
1017         Bool_t first = kTRUE;
1018         
1019         while ((entry = dynamic_cast<TSystemFile*> (dirIter.Next())))
1020         {                       
1021                 if (entry->IsDirectory())
1022                         continue;
1023                         
1024                 TString fileName(entry->GetName());
1025                 if (!fileName.BeginsWith(begin))
1026                         continue;
1027                         
1028                 nDirs++;
1029                         
1030                 if (first)
1031                 {
1032                         first = kFALSE;
1033                         // check that folder exists, otherwise create it
1034                         TGridResult* result = gGrid->Ls(alienDir.Data(), "a");
1035                         
1036                         if (!result)
1037                         {
1038                                 delete dirList;
1039                                 return kFALSE;
1040                         }
1041                         
1042                         if (!result->GetFileName(1)) // TODO: It looks like element 0 is always 0!!
1043                         {
1044                                 // TODO It does not work currently! Bug in TAliEn::Mkdir
1045                                 // TODO Manually fixed in local root v5-16-00
1046                                 if (!gGrid->Mkdir(alienDir.Data(),"-p",0))
1047                                 {
1048                                         Log("SHUTTLE", Form("CopyFilesToGrid - Cannot create directory %s",
1049                                                         alienDir.Data()));
1050                                         delete dirList;
1051                                         return kFALSE;
1052                                 } else {
1053                                         Log("SHUTTLE",Form("CopyFilesToGrid - Folder %s created", alienDir.Data()));
1054                                 }
1055                                 
1056                         } else {
1057                                         Log("SHUTTLE",Form("CopyFilesToGrid - Folder %s found", alienDir.Data()));
1058                         }
1059                 }
1060                         
1061                 TString fullLocalPath;
1062                 fullLocalPath.Form("%s/%s", dir.Data(), fileName.Data());
1063                 
1064                 TString fullGridPath;
1065                 fullGridPath.Form("alien://%s/%s", alienDir.Data(), fileName.Data());
1066
1067                 Bool_t result = TFile::Cp(fullLocalPath, fullGridPath);
1068                 
1069                 if (result)
1070                 {
1071                         Log("SHUTTLE", Form("CopyFilesToGrid - Copying local file %s to %s succeeded!", 
1072                                                 fullLocalPath.Data(), fullGridPath.Data()));
1073                         RemoveFile(fullLocalPath);
1074                         nTransfer++;
1075                 }
1076                 else
1077                 {
1078                         Log("SHUTTLE", Form("CopyFilesToGrid - Copying local file %s to %s FAILED!", 
1079                                                 fullLocalPath.Data(), fullGridPath.Data()));
1080                         success = kFALSE;
1081                 }
1082         }
1083
1084         Log("SHUTTLE", Form("CopyFilesToGrid - %d (over %d) files in folder %s copied to Grid.", 
1085                                                 nTransfer, nDirs, dir.Data()));
1086
1087                 
1088         delete dirList;
1089         return success;
1090 }
1091
1092 //______________________________________________________________________________________________
1093 const char* AliShuttle::GetRefFilePrefix(const char* base, const char* detector)
1094 {
1095         //
1096         // Get folder name of reference files 
1097         //
1098
1099         TString offDetStr(GetOfflineDetName(detector));
1100         TString dir;
1101         if (offDetStr == "ITS" || offDetStr == "MUON" || offDetStr == "PHOS")
1102         {
1103                 dir.Form("%s/%s/%s", base, offDetStr.Data(), detector);
1104         } else {
1105                 dir.Form("%s/%s", base, offDetStr.Data());
1106         }
1107         
1108         return dir.Data();
1109         
1110
1111 }
1112
1113 //______________________________________________________________________________________________
1114 void AliShuttle::CleanLocalStorage(const TString& uri)
1115 {
1116         //
1117         // Called in case the preprocessor is declared failed. Remove remaining objects from the local storages.
1118         //
1119
1120         const char* type = 0;
1121         if(uri == fgkLocalCDB) {
1122                 type = "OCDB";
1123         } else if(uri == fgkLocalRefStorage) {
1124                 type = "Reference";
1125         } else {
1126                 AliError(Form("Invalid storage URI: %s", uri.Data()));
1127                 return;
1128         }
1129
1130         AliCDBManager* man = AliCDBManager::Instance();
1131
1132         // open local storage
1133         AliCDBStorage *localSto = man->GetStorage(uri);
1134         if(!localSto) {
1135                 Log("SHUTTLE",
1136                         Form("CleanLocalStorage - cannot activate local %s storage", type));
1137                 return;
1138         }
1139
1140         TString filename(Form("%s/%s/*/Run*_v%d_s*.root",
1141                 localSto->GetBaseFolder().Data(), GetOfflineDetName(fCurrentDetector.Data()), GetCurrentRun()));
1142
1143         AliDebug(2, Form("filename = %s", filename.Data()));
1144
1145         Log("SHUTTLE", Form("Removing remaining local files for run %d and detector %s ...",
1146                 GetCurrentRun(), fCurrentDetector.Data()));
1147
1148         RemoveFile(filename.Data());
1149
1150 }
1151
1152 //______________________________________________________________________________________________
1153 void AliShuttle::RemoveFile(const char* filename)
1154 {
1155         //
1156         // removes local file
1157         //
1158
1159         TString command(Form("rm -f %s", filename));
1160
1161         Int_t result = gSystem->Exec(command.Data());
1162         if(result != 0)
1163         {
1164                 Log("SHUTTLE", Form("RemoveFile - %s: Cannot remove file %s!",
1165                         fCurrentDetector.Data(), filename));
1166         }
1167 }
1168
1169 //______________________________________________________________________________________________
1170 AliShuttleStatus* AliShuttle::ReadShuttleStatus()
1171 {
1172         //
1173         // Reads the AliShuttleStatus from the CDB
1174         //
1175
1176         if (fStatusEntry){
1177                 delete fStatusEntry;
1178                 fStatusEntry = 0;
1179         }
1180
1181         fStatusEntry = AliCDBManager::Instance()->GetStorage(GetLocalCDB())
1182                 ->Get(Form("/SHUTTLE/STATUS/%s", fCurrentDetector.Data()), GetCurrentRun());
1183
1184         if (!fStatusEntry) return 0;
1185         fStatusEntry->SetOwner(1);
1186
1187         AliShuttleStatus* status = dynamic_cast<AliShuttleStatus*> (fStatusEntry->GetObject());
1188         if (!status) {
1189                 AliError("Invalid object stored to CDB!");
1190                 return 0;
1191         }
1192
1193         return status;
1194 }
1195
1196 //______________________________________________________________________________________________
1197 Bool_t AliShuttle::WriteShuttleStatus(AliShuttleStatus* status)
1198 {
1199         //
1200         // writes the status for one subdetector
1201         //
1202
1203         if (fStatusEntry){
1204                 delete fStatusEntry;
1205                 fStatusEntry = 0;
1206         }
1207
1208         Int_t run = GetCurrentRun();
1209
1210         AliCDBId id(AliCDBPath("SHUTTLE", "STATUS", fCurrentDetector), run, run);
1211
1212         fStatusEntry = new AliCDBEntry(status, id, new AliCDBMetaData);
1213         fStatusEntry->SetOwner(1);
1214
1215         UInt_t result = AliCDBManager::Instance()->GetStorage(fgkLocalCDB)->Put(fStatusEntry);
1216
1217         if (!result) {
1218                 Log("SHUTTLE", Form("WriteShuttleStatus - Failed for %s, run %d",
1219                                                 fCurrentDetector.Data(), run));
1220                 return kFALSE;
1221         }
1222         
1223         SendMLInfo();
1224
1225         return kTRUE;
1226 }
1227
1228 //______________________________________________________________________________________________
1229 void AliShuttle::UpdateShuttleStatus(AliShuttleStatus::Status newStatus, Bool_t increaseCount)
1230 {
1231         //
1232         // changes the AliShuttleStatus for the given detector and run to the given status
1233         //
1234
1235         if (!fStatusEntry){
1236                 AliError("UNEXPECTED: fStatusEntry empty");
1237                 return;
1238         }
1239
1240         AliShuttleStatus* status = dynamic_cast<AliShuttleStatus*> (fStatusEntry->GetObject());
1241
1242         if (!status){
1243                 Log("SHUTTLE", "UpdateShuttleStatus - UNEXPECTED: status could not be read from current CDB entry");
1244                 return;
1245         }
1246
1247         TString actionStr = Form("UpdateShuttleStatus - %s: Changing state from %s to %s",
1248                                 fCurrentDetector.Data(),
1249                                 status->GetStatusName(),
1250                                 status->GetStatusName(newStatus));
1251         Log("SHUTTLE", actionStr);
1252         SetLastAction(actionStr);
1253
1254         status->SetStatus(newStatus);
1255         if (increaseCount) status->IncreaseCount();
1256
1257         AliCDBManager::Instance()->GetStorage(fgkLocalCDB)->Put(fStatusEntry);
1258
1259         SendMLInfo();
1260 }
1261
1262 //______________________________________________________________________________________________
1263 void AliShuttle::SendMLInfo()
1264 {
1265         //
1266         // sends ML information about the current status of the current detector being processed
1267         //
1268         
1269         AliShuttleStatus* status = dynamic_cast<AliShuttleStatus*> (fStatusEntry->GetObject());
1270         
1271         if (!status){
1272                 Log("SHUTTLE", "SendMLInfo - UNEXPECTED: status could not be read from current CDB entry");
1273                 return;
1274         }
1275         
1276         TMonaLisaText  mlStatus(Form("%s_status", fCurrentDetector.Data()), status->GetStatusName());
1277         TMonaLisaValue mlRetryCount(Form("%s_count", fCurrentDetector.Data()), status->GetCount());
1278
1279         TList mlList;
1280         mlList.Add(&mlStatus);
1281         mlList.Add(&mlRetryCount);
1282
1283         TString mlID;
1284         mlID.Form("%d", GetCurrentRun());
1285         fMonaLisa->SendParameters(&mlList, mlID);
1286 }
1287
1288 //______________________________________________________________________________________________
1289 Bool_t AliShuttle::ContinueProcessing()
1290 {
1291         // this function reads the AliShuttleStatus information from CDB and
1292         // checks if the processing should be continued
1293         // if yes it returns kTRUE and updates the AliShuttleStatus with nextStatus
1294
1295         if (!fConfig->HostProcessDetector(fCurrentDetector)) return kFALSE;
1296
1297         AliPreprocessor* aPreprocessor =
1298                 dynamic_cast<AliPreprocessor*> (fPreprocessorMap.GetValue(fCurrentDetector));
1299         if (!aPreprocessor)
1300         {
1301                 Log("SHUTTLE", Form("ContinueProcessing - %s: no preprocessor registered", fCurrentDetector.Data()));
1302                 return kFALSE;
1303         }
1304
1305         AliShuttleLogbookEntry::Status entryStatus =
1306                 fLogbookEntry->GetDetectorStatus(fCurrentDetector);
1307
1308         if(entryStatus != AliShuttleLogbookEntry::kUnprocessed) {
1309                 Log("SHUTTLE", Form("ContinueProcessing - %s is %s",
1310                                 fCurrentDetector.Data(),
1311                                 fLogbookEntry->GetDetectorStatusName(entryStatus)));
1312                 return kFALSE;
1313         }
1314
1315         // if we get here, according to Shuttle logbook subdetector is in UNPROCESSED state
1316
1317         // check if current run is first unprocessed run for current detector
1318         if (fConfig->StrictRunOrder(fCurrentDetector) &&
1319                 !fFirstUnprocessed[GetDetPos(fCurrentDetector)])
1320         {
1321                 if (fTestMode == kNone)
1322                 {
1323                         Log("SHUTTLE", Form("ContinueProcessing - %s requires strict run ordering"
1324                                         " but this is not the first unprocessed run!"));
1325                         return kFALSE;
1326                 }
1327                 else
1328                 {
1329                         Log("SHUTTLE", Form("ContinueProcessing - In TESTMODE - "
1330                                         "Although %s requires strict run ordering "
1331                                         "and this is not the first unprocessed run, "
1332                                         "the SHUTTLE continues"));
1333                 }
1334         }
1335
1336         AliShuttleStatus* status = ReadShuttleStatus();
1337         if (!status) {
1338                 // first time
1339                 Log("SHUTTLE", Form("ContinueProcessing - %s: Processing first time",
1340                                 fCurrentDetector.Data()));
1341                 status = new AliShuttleStatus(AliShuttleStatus::kStarted);
1342                 return WriteShuttleStatus(status);
1343         }
1344
1345         // The following two cases shouldn't happen if Shuttle Logbook was correctly updated.
1346         // If it happens it may mean Logbook updating failed... let's do it now!
1347         if (status->GetStatus() == AliShuttleStatus::kDone ||
1348             status->GetStatus() == AliShuttleStatus::kFailed){
1349                 Log("SHUTTLE", Form("ContinueProcessing - %s is already %s. Updating Shuttle Logbook",
1350                                         fCurrentDetector.Data(),
1351                                         status->GetStatusName(status->GetStatus())));
1352                 UpdateShuttleLogbook(fCurrentDetector.Data(),
1353                                         status->GetStatusName(status->GetStatus()));
1354                 return kFALSE;
1355         }
1356
1357         if (status->GetStatus() == AliShuttleStatus::kStoreError) {
1358                 Log("SHUTTLE",
1359                         Form("ContinueProcessing - %s: Grid storage of one or more "
1360                                 "objects failed. Trying again now",
1361                                 fCurrentDetector.Data()));
1362                 UpdateShuttleStatus(AliShuttleStatus::kStoreStarted);
1363                 if (StoreOCDB()){
1364                         Log("SHUTTLE", Form("ContinueProcessing - %s: all objects "
1365                                 "successfully stored into main storage",
1366                                 fCurrentDetector.Data()));
1367                 } else {
1368                         Log("SHUTTLE",
1369                                 Form("ContinueProcessing - %s: Grid storage failed again",
1370                                         fCurrentDetector.Data()));
1371                         UpdateShuttleStatus(AliShuttleStatus::kStoreError);
1372                 }
1373                 return kFALSE;
1374         }
1375
1376         // if we get here, there is a restart
1377         Bool_t cont = kFALSE;
1378
1379         // abort conditions
1380         if (status->GetCount() >= fConfig->GetMaxRetries()) {
1381                 Log("SHUTTLE", Form("ContinueProcessing - %s failed %d times in status %s - "
1382                                 "Updating Shuttle Logbook", fCurrentDetector.Data(),
1383                                 status->GetCount(), status->GetStatusName()));
1384                 UpdateShuttleLogbook(fCurrentDetector.Data(), "FAILED");
1385                 UpdateShuttleStatus(AliShuttleStatus::kFailed);
1386
1387                 // there may still be objects in local OCDB and reference storage
1388                 // and FXS databases may be not updated: do it now!
1389                 
1390                 // TODO Currently disabled, we want to keep files in case of failure!
1391                 // CleanLocalStorage(fgkLocalCDB);
1392                 // CleanLocalStorage(fgkLocalRefStorage);
1393                 // UpdateTableFailCase();
1394                 
1395                 // Send mail to detector expert!
1396                 Log("SHUTTLE", Form("ContinueProcessing - Sending mail to %s expert...", 
1397                                         fCurrentDetector.Data()));
1398                 if (!SendMail())
1399                         Log("SHUTTLE", Form("ContinueProcessing - Could not send mail to %s expert",
1400                                         fCurrentDetector.Data()));
1401
1402         } else {
1403                 Log("SHUTTLE", Form("ContinueProcessing - %s: restarting. "
1404                                 "Aborted before with %s. Retry number %d.", fCurrentDetector.Data(),
1405                                 status->GetStatusName(), status->GetCount()));
1406                 Bool_t increaseCount = kTRUE;
1407                 if (status->GetStatus() == AliShuttleStatus::kDCSError || 
1408                         status->GetStatus() == AliShuttleStatus::kDCSStarted)
1409                                 increaseCount = kFALSE;
1410                                 
1411                 UpdateShuttleStatus(AliShuttleStatus::kStarted, increaseCount);
1412                 cont = kTRUE;
1413         }
1414
1415         return cont;
1416 }
1417
1418 //______________________________________________________________________________________________
1419 Bool_t AliShuttle::Process(AliShuttleLogbookEntry* entry)
1420 {
1421         //
1422         // Makes data retrieval for all detectors in the configuration.
1423         // entry: Shuttle logbook entry, contains run paramenters and status of detectors
1424         // (Unprocessed, Inactive, Failed or Done).
1425         // Returns kFALSE in case of error occured and kTRUE otherwise
1426         //
1427
1428         if (!entry) return kFALSE;
1429
1430         fLogbookEntry = entry;
1431
1432         Log("SHUTTLE", Form("\t\t\t^*^*^*^*^*^*^*^*^*^*^*^* run %d: START ^*^*^*^*^*^*^*^*^*^*^*^*",
1433                                         GetCurrentRun()));
1434
1435         // Send the information to ML
1436         TMonaLisaText  mlStatus("SHUTTLE_status", "Processing");
1437         TMonaLisaText  mlRunType("SHUTTLE_runtype", Form("%s (%s)", entry->GetRunType(), entry->GetRunParameter("log")));
1438
1439         TList mlList;
1440         mlList.Add(&mlStatus);
1441         mlList.Add(&mlRunType);
1442
1443         TString mlID;
1444         mlID.Form("%d", GetCurrentRun());
1445         fMonaLisa->SendParameters(&mlList, mlID);
1446
1447         if (fLogbookEntry->IsDone())
1448         {
1449                 Log("SHUTTLE","Process - Shuttle is already DONE. Updating logbook");
1450                 UpdateShuttleLogbook("shuttle_done");
1451                 fLogbookEntry = 0;
1452                 return kTRUE;
1453         }
1454
1455         // read test mode if flag is set
1456         if (fReadTestMode)
1457         {
1458                 fTestMode = kNone;
1459                 TString logEntry(entry->GetRunParameter("log"));
1460                 //printf("log entry = %s\n", logEntry.Data());
1461                 TString searchStr("Testmode: ");
1462                 Int_t pos = logEntry.Index(searchStr.Data());
1463                 //printf("%d\n", pos);
1464                 if (pos >= 0)
1465                 {
1466                         TSubString subStr = logEntry(pos + searchStr.Length(), logEntry.Length());
1467                         //printf("%s\n", subStr.String().Data());
1468                         TString newStr(subStr.Data());
1469                         TObjArray* token = newStr.Tokenize(' ');
1470                         if (token)
1471                         {
1472                                 //token->Print();
1473                                 TObjString* tmpStr = dynamic_cast<TObjString*> (token->First());
1474                                 if (tmpStr)
1475                                 {
1476                                         Int_t testMode = tmpStr->String().Atoi();
1477                                         if (testMode > 0)
1478                                         {
1479                                                 Log("SHUTTLE", Form("Process - Enabling test mode %d", testMode));
1480                                                 SetTestMode((TestMode) testMode);
1481                                         }
1482                                 }
1483                                 delete token;          
1484                         }
1485                 }
1486         }
1487                 
1488         fLogbookEntry->Print("all");
1489
1490         // Initialization
1491         Bool_t hasError = kFALSE;
1492
1493         // Set the CDB and Reference folders according to the year and LHC period
1494         TString lhcPeriod(GetLHCPeriod());
1495         if (lhcPeriod.Length() == 0) 
1496         {
1497                 Log("SHUTTLE","Process - LHCPeriod not found in logbook!");
1498                 return 0; 
1499         }       
1500         
1501         if (fgkMainCDB.Length() == 0)
1502                 fgkMainCDB = Form("alien://folder=/alice/data/%d/%s/OCDB?user=alidaq?cacheFold=/tmp/OCDBCache", 
1503                                         GetCurrentYear(), lhcPeriod.Data());
1504         
1505         if (fgkMainRefStorage.Length() == 0)
1506                 fgkMainRefStorage = Form("alien://folder=/alice/data/%d/%s/Reference?user=alidaq?cacheFold=/tmp/OCDBCache", 
1507                                         GetCurrentYear(), lhcPeriod.Data());
1508         
1509         AliCDBStorage *mainCDBSto = AliCDBManager::Instance()->GetStorage(fgkMainCDB);
1510         if(mainCDBSto) mainCDBSto->QueryCDB(GetCurrentRun());
1511         AliCDBStorage *mainRefSto = AliCDBManager::Instance()->GetStorage(fgkMainRefStorage);
1512         if(mainRefSto) mainRefSto->QueryCDB(GetCurrentRun());
1513
1514         // Loop on detectors in the configuration
1515         TIter iter(fConfig->GetDetectors());
1516         TObjString* aDetector = 0;
1517
1518         while ((aDetector = (TObjString*) iter.Next()))
1519         {
1520                 fCurrentDetector = aDetector->String();
1521
1522                 if (ContinueProcessing() == kFALSE) continue;
1523
1524                 Log("SHUTTLE", Form("\t\t\t****** run %d - %s: START  ******",
1525                                                 GetCurrentRun(), aDetector->GetName()));
1526
1527                 for(Int_t iSys=0;iSys<3;iSys++) fFXSCalled[iSys]=kFALSE;
1528
1529                 Log(fCurrentDetector.Data(), "Process - Starting processing");
1530
1531                 Int_t pid = fork();
1532
1533                 if (pid < 0)
1534                 {
1535                         Log("SHUTTLE", "Process - ERROR: Forking failed");
1536                 }
1537                 else if (pid > 0)
1538                 {
1539                         // parent
1540                         Log("SHUTTLE", Form("Process - In parent process of %d - %s: Starting monitoring",
1541                                                         GetCurrentRun(), aDetector->GetName()));
1542
1543                         Long_t begin = time(0);
1544
1545                         int status; // to be used with waitpid, on purpose an int (not Int_t)!
1546                         while (waitpid(pid, &status, WNOHANG) == 0)
1547                         {
1548                                 Long_t expiredTime = time(0) - begin;
1549
1550                                 if (expiredTime > fConfig->GetPPTimeOut())
1551                                 {
1552                                         TString tmp;
1553                                         tmp.Form("Process - Process of %s time out. "
1554                                                         "Run time: %d seconds. Killing...",
1555                                                         fCurrentDetector.Data(), expiredTime);
1556                                         Log("SHUTTLE", tmp);
1557                                         Log(fCurrentDetector, tmp);
1558
1559                                         kill(pid, 9);
1560
1561                                         UpdateShuttleStatus(AliShuttleStatus::kPPTimeOut);
1562                                         hasError = kTRUE;
1563
1564                                         gSystem->Sleep(1000);
1565                                 }
1566                                 else
1567                                 {
1568                                         gSystem->Sleep(1000);
1569                                         
1570                                         TString checkStr;
1571                                         checkStr.Form("ps -o vsize --pid %d | tail -n 1", pid);
1572                                         FILE* pipe = gSystem->OpenPipe(checkStr, "r");
1573                                         if (!pipe)
1574                                         {
1575                                                 Log("SHUTTLE", Form("Process - Error: "
1576                                                         "Could not open pipe to %s", checkStr.Data()));
1577                                                 continue;
1578                                         }
1579                                                 
1580                                         char buffer[100];
1581                                         if (!fgets(buffer, 100, pipe))
1582                                         {
1583                                                 Log("SHUTTLE", "Process - Error: ps did not return anything");
1584                                                 gSystem->ClosePipe(pipe);
1585                                                 continue;
1586                                         }
1587                                         gSystem->ClosePipe(pipe);
1588                                         
1589                                         //Log("SHUTTLE", Form("ps returned %s", buffer));
1590                                         
1591                                         Int_t mem = 0;
1592                                         if ((sscanf(buffer, "%d\n", &mem) != 1) || !mem)
1593                                         {
1594                                                 Log("SHUTTLE", "Process - Error: Could not parse output of ps");
1595                                                 continue;
1596                                         }
1597                                         
1598                                         if (expiredTime % 60 == 0)
1599                                         {
1600                                                 Log("SHUTTLE", Form("Process - %s: Checking process. "
1601                                                         "Run time: %d seconds - Memory consumption: %d KB",
1602                                                         fCurrentDetector.Data(), expiredTime, mem));
1603                                                 SendAlive();
1604                                         }
1605                                         
1606                                         if (mem > fConfig->GetPPMaxMem())
1607                                         {
1608                                                 TString tmp;
1609                                                 tmp.Form("Process - Process exceeds maximum allowed memory "
1610                                                         "(%d KB > %d KB). Killing...",
1611                                                         mem, fConfig->GetPPMaxMem());
1612                                                 Log("SHUTTLE", tmp);
1613                                                 Log(fCurrentDetector, tmp);
1614         
1615                                                 kill(pid, 9);
1616         
1617                                                 UpdateShuttleStatus(AliShuttleStatus::kPPOutOfMemory);
1618                                                 hasError = kTRUE;
1619         
1620                                                 gSystem->Sleep(1000);
1621                                         }
1622                                 }
1623                         }
1624
1625                         Log("SHUTTLE", Form("Process - In parent process of %d - %s: Client has terminated.",
1626                                                                 GetCurrentRun(), aDetector->GetName()));
1627
1628                         if (WIFEXITED(status))
1629                         {
1630                                 Int_t returnCode = WEXITSTATUS(status);
1631
1632                                 Log("SHUTTLE", Form("Process - %s: the return code is %d", fCurrentDetector.Data(),
1633                                                                                 returnCode));
1634
1635                                 if (returnCode == 0) hasError = kTRUE;
1636                         }
1637                 }
1638                 else if (pid == 0)
1639                 {
1640                         // client
1641                         Log("SHUTTLE", Form("Process - In client process of %d - %s", GetCurrentRun(),
1642                                 aDetector->GetName()));
1643
1644                         Log("SHUTTLE", Form("Process - Redirecting output to %s log",fCurrentDetector.Data()));
1645
1646                         if ((freopen(GetLogFileName(fCurrentDetector), "a", stdout)) == 0)
1647                         {
1648                                 Log("SHUTTLE", "Process - Could not freopen stdout");
1649                         }
1650                         else
1651                         {
1652                                 fOutputRedirected = kTRUE;
1653                                 if ((dup2(fileno(stdout), fileno(stderr))) < 0)
1654                                         Log("SHUTTLE", "Process - Could not redirect stderr");
1655                                 
1656                         }
1657                         
1658                         TString wd = gSystem->WorkingDirectory();
1659                         TString tmpDir = Form("%s/%s_%d_process", GetShuttleTempDir(), 
1660                                 fCurrentDetector.Data(), GetCurrentRun());
1661                         
1662                         Int_t result = gSystem->GetPathInfo(tmpDir.Data(), 0, (Long64_t*) 0, 0, 0);
1663                         if (!result) // temp dir already exists!
1664                         {
1665                                 Log(fCurrentDetector.Data(), 
1666                                         Form("Process - %s dir already exists! Removing...", tmpDir.Data()));
1667                                 gSystem->Exec(Form("rm -rf %s",tmpDir.Data()));         
1668                         } 
1669                         
1670                         if (gSystem->mkdir(tmpDir.Data(), 1))
1671                         {
1672                                 Log(fCurrentDetector.Data(), "Process - could not make temp directory!!");
1673                                 gSystem->Exit(1);
1674                         }
1675                         
1676                         if (!gSystem->ChangeDirectory(tmpDir.Data())) 
1677                         {
1678                                 Log(fCurrentDetector.Data(), "Process - could not change directory!!");
1679                                 gSystem->Exit(1);                       
1680                         }
1681                         
1682                         Bool_t success = ProcessCurrentDetector();
1683                         
1684                         gSystem->ChangeDirectory(wd.Data());
1685                                                 
1686                         if (success) // Preprocessor finished successfully!
1687                         { 
1688                                 // remove temporary folder
1689                                 gSystem->Exec(Form("rm -rf %s",tmpDir.Data()));
1690                                 
1691                                 // Update time_processed field in FXS DB
1692                                 if (UpdateTable() == kFALSE)
1693                                         Log("SHUTTLE", Form("Process - %s: Could not update FXS databases!", 
1694                                                         fCurrentDetector.Data()));
1695
1696                                 // Transfer the data from local storage to main storage (Grid)
1697                                 UpdateShuttleStatus(AliShuttleStatus::kStoreStarted);
1698                                 if (StoreOCDB() == kFALSE)
1699                                 {
1700                                         Log("SHUTTLE", 
1701                                                 Form("\t\t\t****** run %d - %s: STORAGE ERROR ******",
1702                                                         GetCurrentRun(), aDetector->GetName()));
1703                                         UpdateShuttleStatus(AliShuttleStatus::kStoreError);
1704                                         success = kFALSE;
1705                                 } else {
1706                                         Log("SHUTTLE", 
1707                                                 Form("\t\t\t****** run %d - %s: DONE ******",
1708                                                         GetCurrentRun(), aDetector->GetName()));
1709                                         UpdateShuttleStatus(AliShuttleStatus::kDone);
1710                                         UpdateShuttleLogbook(fCurrentDetector, "DONE");
1711                                 }
1712                         } else 
1713                         {
1714                                 Log("SHUTTLE", 
1715                                         Form("\t\t\t****** run %d - %s: PP ERROR ******",
1716                                                 GetCurrentRun(), aDetector->GetName()));
1717                         }
1718
1719                         for (UInt_t iSys=0; iSys<3; iSys++)
1720                         {
1721                                 if (fFXSCalled[iSys]) fFXSlist[iSys].Clear();
1722                         }
1723
1724                         Log("SHUTTLE", Form("Process - Client process of %d - %s is exiting now with %d.",
1725                                                         GetCurrentRun(), aDetector->GetName(), success));
1726
1727                         // the client exits here
1728                         gSystem->Exit(success);
1729
1730                         AliError("We should never get here!!!");
1731                 }
1732         }
1733
1734         Log("SHUTTLE", Form("\t\t\t^*^*^*^*^*^*^*^*^*^*^*^* run %d: FINISH ^*^*^*^*^*^*^*^*^*^*^*^*",
1735                                                         GetCurrentRun()));
1736
1737         //check if shuttle is done for this run, if so update logbook
1738         TObjArray checkEntryArray;
1739         checkEntryArray.SetOwner(1);
1740         TString whereClause = Form("where run=%d", GetCurrentRun());
1741         if (!QueryShuttleLogbook(whereClause.Data(), checkEntryArray) || 
1742                         checkEntryArray.GetEntries() == 0) {
1743                 Log("SHUTTLE", Form("Process - Warning: Cannot check status of run %d on Shuttle logbook!",
1744                                                 GetCurrentRun()));
1745                 return hasError == kFALSE;
1746         }
1747
1748         AliShuttleLogbookEntry* checkEntry = dynamic_cast<AliShuttleLogbookEntry*>
1749                                                 (checkEntryArray.At(0));
1750
1751         if (checkEntry)
1752         {
1753                 if (checkEntry->IsDone())
1754                 {
1755                         Log("SHUTTLE","Process - Shuttle is DONE. Updating logbook");
1756                         UpdateShuttleLogbook("shuttle_done");
1757                 }
1758                 else
1759                 {
1760                         for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
1761                         {
1762                                 if (checkEntry->GetDetectorStatus(iDet) == AliShuttleLogbookEntry::kUnprocessed)
1763                                 {
1764                                         AliDebug(2, Form("Run %d: setting %s as \"not first time unprocessed\"",
1765                                                         checkEntry->GetRun(), GetDetName(iDet)));
1766                                         fFirstUnprocessed[iDet] = kFALSE;
1767                                 }
1768                         }
1769                 }
1770         }
1771
1772         fLogbookEntry = 0;
1773
1774         return hasError == kFALSE;
1775 }
1776
1777 //______________________________________________________________________________________________
1778 Bool_t AliShuttle::ProcessCurrentDetector()
1779 {
1780         //
1781         // Makes data retrieval just for a specific detector (fCurrentDetector).
1782         // Threre should be a configuration for this detector.
1783
1784         Log("SHUTTLE", Form("ProcessCurrentDetector - Retrieving values for %s, run %d", 
1785                                                 fCurrentDetector.Data(), GetCurrentRun()));
1786
1787         TString wd = gSystem->WorkingDirectory();
1788         
1789         if (!CleanReferenceStorage(fCurrentDetector.Data()))
1790                 return kFALSE;
1791         
1792         gSystem->ChangeDirectory(wd.Data());
1793         
1794         TMap* dcsMap = new TMap();
1795
1796         // call preprocessor
1797         AliPreprocessor* aPreprocessor =
1798                 dynamic_cast<AliPreprocessor*> (fPreprocessorMap.GetValue(fCurrentDetector));
1799
1800         aPreprocessor->Initialize(GetCurrentRun(), GetCurrentStartTime(), GetCurrentEndTime());
1801
1802         Bool_t processDCS = aPreprocessor->ProcessDCS();
1803
1804         if (!processDCS)
1805         {
1806                 Log(fCurrentDetector, "ProcessCurrentDetector -"
1807                         " The preprocessor requested to skip the retrieval of DCS values");
1808         }
1809         else if (fTestMode & kSkipDCS)
1810         {
1811                 Log(fCurrentDetector, "ProcessCurrentDetector - In TESTMODE: Skipping DCS processing");
1812         } 
1813         else if (fTestMode & kErrorDCS)
1814         {
1815                 Log(fCurrentDetector, "ProcessCurrentDetector - In TESTMODE: Simulating DCS error");
1816                 UpdateShuttleStatus(AliShuttleStatus::kDCSStarted);
1817                 UpdateShuttleStatus(AliShuttleStatus::kDCSError);
1818                 delete dcsMap;
1819                 return kFALSE;
1820         } else {
1821
1822                 UpdateShuttleStatus(AliShuttleStatus::kDCSStarted);
1823
1824                 // Query DCS archive
1825                 Int_t nServers = fConfig->GetNServers(fCurrentDetector);
1826                 
1827                 for (int iServ=0; iServ<nServers; iServ++)
1828                 {
1829                 
1830                         TString host(fConfig->GetDCSHost(fCurrentDetector, iServ));
1831                         Int_t port = fConfig->GetDCSPort(fCurrentDetector, iServ);
1832                         Int_t multiSplit = fConfig->GetMultiSplit(fCurrentDetector, iServ);
1833
1834                         Log(fCurrentDetector, Form("ProcessCurrentDetector -"
1835                                         " Querying DCS Amanda server %s:%d (%d of %d)", 
1836                                         host.Data(), port, iServ+1, nServers));
1837                         
1838                         TMap* aliasMap = 0;
1839                         TMap* dpMap = 0;
1840         
1841                         if (fConfig->GetDCSAliases(fCurrentDetector, iServ)->GetEntries() > 0)
1842                         {
1843                                 aliasMap = GetValueSet(host, port, 
1844                                                 fConfig->GetDCSAliases(fCurrentDetector, iServ), 
1845                                                 kAlias, multiSplit);
1846                                 if (!aliasMap)
1847                                 {
1848                                         Log(fCurrentDetector, 
1849                                                 Form("ProcessCurrentDetector -"
1850                                                         " Error retrieving DCS aliases from server %s."
1851                                                         " Sending mail to DCS experts!", host.Data()));
1852                                         UpdateShuttleStatus(AliShuttleStatus::kDCSError);
1853                                         
1854                                         //if (!SendMailToDCS())
1855                                         //      Log("SHUTTLE", Form("ProcessCurrentDetector - Could not send mail to DCS experts!"));
1856
1857                                         delete dcsMap;
1858                                         return kFALSE;
1859                                 }
1860                         }
1861                         
1862                         if (fConfig->GetDCSDataPoints(fCurrentDetector, iServ)->GetEntries() > 0)
1863                         {
1864                                 dpMap = GetValueSet(host, port, 
1865                                                 fConfig->GetDCSDataPoints(fCurrentDetector, iServ), 
1866                                                 kDP, multiSplit);
1867                                 if (!dpMap)
1868                                 {
1869                                         Log(fCurrentDetector, 
1870                                                 Form("ProcessCurrentDetector -"
1871                                                         " Error retrieving DCS data points from server %s."
1872                                                         " Sending mail to DCS experts!", host.Data()));
1873                                         UpdateShuttleStatus(AliShuttleStatus::kDCSError);
1874                                         
1875                                         //if (!SendMailToDCS())
1876                                         //      Log("SHUTTLE", Form("ProcessCurrentDetector - Could not send mail to DCS experts!"));
1877                                         
1878                                         if (aliasMap) delete aliasMap;
1879                                         delete dcsMap;
1880                                         return kFALSE;
1881                                 }                               
1882                         }
1883                         
1884                         // merge aliasMap and dpMap into dcsMap
1885                         if(aliasMap) {
1886                                 TIter iter(aliasMap);
1887                                 TObjString* key = 0;
1888                                 while ((key = (TObjString*) iter.Next()))
1889                                         dcsMap->Add(key, aliasMap->GetValue(key->String()));
1890                                 
1891                                 aliasMap->SetOwner(kFALSE);
1892                                 delete aliasMap;
1893                         }       
1894                         
1895                         if(dpMap) {
1896                                 TIter iter(dpMap);
1897                                 TObjString* key = 0;
1898                                 while ((key = (TObjString*) iter.Next()))
1899                                         dcsMap->Add(key, dpMap->GetValue(key->String()));
1900                                 
1901                                 dpMap->SetOwner(kFALSE);
1902                                 delete dpMap;
1903                         }
1904                 }
1905         }
1906         
1907         // save map into file, to help debugging in case of preprocessor error
1908         TFile* f = TFile::Open("DCSMap.root","recreate");
1909         f->cd();
1910         dcsMap->Write("DCSMap", TObject::kSingleKey);
1911         f->Close();
1912         delete f;
1913         
1914         // DCS Archive DB processing successful. Call Preprocessor!
1915         UpdateShuttleStatus(AliShuttleStatus::kPPStarted);
1916
1917         UInt_t returnValue = aPreprocessor->Process(dcsMap);
1918
1919         if (returnValue > 0) // Preprocessor error!
1920         {
1921                 Log(fCurrentDetector, Form("ProcessCurrentDetector - "
1922                                 "Preprocessor failed. Process returned %d.", returnValue));
1923                 UpdateShuttleStatus(AliShuttleStatus::kPPError);
1924                 dcsMap->DeleteAll();
1925                 delete dcsMap;
1926                 return kFALSE;
1927         }
1928         
1929         // preprocessor ok!
1930         UpdateShuttleStatus(AliShuttleStatus::kPPDone);
1931         Log(fCurrentDetector, Form("ProcessCurrentDetector - %s preprocessor returned success",
1932                                 fCurrentDetector.Data()));
1933
1934         dcsMap->DeleteAll();
1935         delete dcsMap;
1936
1937         return kTRUE;
1938 }
1939
1940 //______________________________________________________________________________________________
1941 Bool_t AliShuttle::QueryShuttleLogbook(const char* whereClause,
1942                 TObjArray& entries)
1943 {
1944         // Query DAQ's Shuttle logbook and fills detector status object.
1945         // Call QueryRunParameters to query DAQ logbook for run parameters.
1946         //
1947
1948         entries.SetOwner(1);
1949
1950         // check connection, in case connect
1951         if(!Connect(3)) return kFALSE;
1952
1953         TString sqlQuery;
1954         sqlQuery = Form("select * from %s %s order by run", fConfig->GetShuttlelbTable(), whereClause);
1955
1956         TSQLResult* aResult = fServer[3]->Query(sqlQuery);
1957         if (!aResult) {
1958                 AliError(Form("Can't execute query <%s>!", sqlQuery.Data()));
1959                 return kFALSE;
1960         }
1961
1962         AliDebug(2,Form("Query = %s", sqlQuery.Data()));
1963
1964         if(aResult->GetRowCount() == 0) {
1965                 Log("SHUTTLE", "No entries in Shuttle Logbook match request");
1966                 delete aResult;
1967                 return kTRUE;
1968         }
1969
1970         // TODO Check field count!
1971         const UInt_t nCols = 23;
1972         if (aResult->GetFieldCount() != (Int_t) nCols) {
1973                 Log("SHUTTLE", "Invalid SQL result field number!");
1974                 delete aResult;
1975                 return kFALSE;
1976         }
1977
1978         TSQLRow* aRow;
1979         while ((aRow = aResult->Next())) {
1980                 TString runString(aRow->GetField(0), aRow->GetFieldLength(0));
1981                 Int_t run = runString.Atoi();
1982
1983                 AliShuttleLogbookEntry *entry = QueryRunParameters(run);
1984                 if (!entry)
1985                         continue;
1986
1987                 // loop on detectors
1988                 for(UInt_t ii = 0; ii < nCols; ii++)
1989                         entry->SetDetectorStatus(aResult->GetFieldName(ii), aRow->GetField(ii));
1990
1991                 entries.AddLast(entry);
1992                 delete aRow;
1993         }
1994
1995         delete aResult;
1996         return kTRUE;
1997 }
1998
1999 //______________________________________________________________________________________________
2000 AliShuttleLogbookEntry* AliShuttle::QueryRunParameters(Int_t run)
2001 {
2002         //
2003         // Retrieve run parameters written in the DAQ logbook and sets them into AliShuttleLogbookEntry object
2004         //
2005
2006         // check connection, in case connect
2007         if (!Connect(3))
2008                 return 0;
2009
2010         TString sqlQuery;
2011         sqlQuery.Form("select * from %s where run=%d", fConfig->GetDAQlbTable(), run);
2012
2013         TSQLResult* aResult = fServer[3]->Query(sqlQuery);
2014         if (!aResult) {
2015                 Log("SHUTTLE", Form("Can't execute query <%s>!", sqlQuery.Data()));
2016                 return 0;
2017         }
2018
2019         if (aResult->GetRowCount() == 0) {
2020                 Log("SHUTTLE", Form("QueryRunParameters - No entry in DAQ Logbook for run %d. Skipping", run));
2021                 delete aResult;
2022                 return 0;
2023         }
2024
2025         if (aResult->GetRowCount() > 1) {
2026                 Log("SHUTTLE", Form("QueryRunParameters - UNEXPECTED: "
2027                                 "more than one entry in DAQ Logbook for run %d!", run));
2028                 delete aResult;
2029                 return 0;
2030         }
2031
2032         TSQLRow* aRow = aResult->Next();
2033         if (!aRow)
2034         {
2035                 Log("SHUTTLE", Form("QueryRunParameters - Could not retrieve row for run %d. Skipping", run));
2036                 delete aResult;
2037                 return 0;
2038         }
2039
2040         AliShuttleLogbookEntry* entry = new AliShuttleLogbookEntry(run);
2041
2042         for (Int_t ii = 0; ii < aResult->GetFieldCount(); ii++)
2043                 entry->SetRunParameter(aResult->GetFieldName(ii), aRow->GetField(ii));
2044
2045         UInt_t startTime = entry->GetStartTime();
2046         UInt_t endTime = entry->GetEndTime();
2047
2048 //      if (!startTime || !endTime || startTime > endTime) 
2049 //      {
2050 //              Log("SHUTTLE",
2051 //                      Form("QueryRunParameters - Invalid parameters for Run %d: startTime = %d, endTime = %d. Skipping!",
2052 //                              run, startTime, endTime));              
2053 //              
2054 //              Log("SHUTTLE", Form("Marking SHUTTLE done for run %d", run));
2055 //              fLogbookEntry = entry;  
2056 //              if (!UpdateShuttleLogbook("shuttle_done"))
2057 //              {
2058 //                      AliError(Form("Could not update logbook for run %d !", run));
2059 //              }
2060 //              fLogbookEntry = 0;
2061 //                              
2062 //              delete entry;
2063 //              delete aRow;
2064 //              delete aResult;
2065 //              return 0;
2066 //      }
2067
2068         if (!startTime) 
2069         {
2070                 Log("SHUTTLE",
2071                         Form("QueryRunParameters - Invalid parameters for Run %d: " 
2072                                 "startTime = %d, endTime = %d. Skipping!",
2073                                         run, startTime, endTime));              
2074                 
2075                 Log("SHUTTLE", Form("Marking SHUTTLE done for run %d", run));
2076                 fLogbookEntry = entry;  
2077                 if (!UpdateShuttleLogbook("shuttle_ignored"))
2078                 {
2079                         AliError(Form("Could not update logbook for run %d !", run));
2080                 }
2081                 fLogbookEntry = 0;
2082                                 
2083                 delete entry;
2084                 delete aRow;
2085                 delete aResult;
2086                 return 0;
2087         }
2088         
2089         if (startTime && !endTime) 
2090         {
2091                 // TODO Here we don't mark SHUTTLE done, because this may mean 
2092                 //the run is still ongoing!!            
2093                 Log("SHUTTLE",
2094                         Form("QueryRunParameters - Invalid parameters for Run %d: "
2095                              "startTime = %d, endTime = %d. Skipping (Shuttle won't be marked as DONE)!",
2096                                         run, startTime, endTime));              
2097                 
2098                 //Log("SHUTTLE", Form("Marking SHUTTLE done for run %d", run));
2099                 //fLogbookEntry = entry;        
2100                 //if (!UpdateShuttleLogbook("shuttle_done"))
2101                 //{
2102                 //      AliError(Form("Could not update logbook for run %d !", run));
2103                 //}
2104                 //fLogbookEntry = 0;
2105                                 
2106                 delete entry;
2107                 delete aRow;
2108                 delete aResult;
2109                 return 0;
2110         }
2111                         
2112         if (startTime && endTime && (startTime > endTime)) 
2113         {
2114                 Log("SHUTTLE",
2115                         Form("QueryRunParameters - Invalid parameters for Run %d: "
2116                                 "startTime = %d, endTime = %d. Skipping!",
2117                                         run, startTime, endTime));              
2118                 
2119                 Log("SHUTTLE", Form("Marking SHUTTLE done for run %d", run));
2120                 fLogbookEntry = entry;  
2121                 if (!UpdateShuttleLogbook("shuttle_ignored"))
2122                 {
2123                         AliError(Form("Could not update logbook for run %d !", run));
2124                 }
2125                 fLogbookEntry = 0;
2126                                 
2127                 delete entry;
2128                 delete aRow;
2129                 delete aResult;
2130                 return 0;
2131         }
2132                         
2133         TString totEventsStr = entry->GetRunParameter("totalEvents");  
2134         Int_t totEvents = totEventsStr.Atoi();
2135         if (totEvents < 1) 
2136         {
2137                 Log("SHUTTLE",
2138                         Form("QueryRunParameters - Run %d has 0 events - Skipping!", run));             
2139                 
2140                 Log("SHUTTLE", Form("Marking SHUTTLE done for run %d", run));           
2141                 fLogbookEntry = entry;  
2142                 if (!UpdateShuttleLogbook("shuttle_ignored"))
2143                 {
2144                         AliError(Form("Could not update logbook for run %d !", run));
2145                 }
2146                 fLogbookEntry = 0;
2147                                 
2148                 delete entry;
2149                 delete aRow;
2150                 delete aResult;
2151                 return 0;
2152         }
2153
2154         delete aRow;
2155         delete aResult;
2156
2157         return entry;
2158 }
2159
2160 //______________________________________________________________________________________________
2161 TMap* AliShuttle::GetValueSet(const char* host, Int_t port, const TSeqCollection* entries,
2162                               DCSType type, Int_t multiSplit)
2163 {
2164         // Retrieve all "entry" data points from the DCS server
2165         // host, port: TSocket connection parameters
2166         // entries: list of name of the alias or data point
2167         // type: kAlias or kDP
2168         // returns TMap of values, 0 when failure
2169         
2170         AliDCSClient client(host, port, fTimeout, fRetries, multiSplit);
2171
2172         TMap* result = 0;
2173         if (type == kAlias)
2174         {
2175                 result = client.GetAliasValues(entries, GetCurrentStartTime(), 
2176                         GetCurrentEndTime());
2177         } 
2178         else if (type == kDP)
2179         {
2180                 result = client.GetDPValues(entries, GetCurrentStartTime(), 
2181                         GetCurrentEndTime());
2182         }
2183
2184         if (result == 0)
2185         {
2186                 Log(fCurrentDetector.Data(), Form("GetValueSet - Can't get entries! Reason: %s",
2187                         client.GetErrorString(client.GetResultErrorCode())));
2188                 if (client.GetResultErrorCode() == AliDCSClient::fgkServerError)        
2189                         Log(fCurrentDetector.Data(), Form("GetValueSet - Server error code: %s",
2190                                 client.GetServerError().Data()));
2191
2192                 return 0;
2193         }
2194                 
2195         return result;
2196 }
2197
2198 //______________________________________________________________________________________________
2199 const char* AliShuttle::GetFile(Int_t system, const char* detector,
2200                 const char* id, const char* source)
2201 {
2202         // Get calibration file from file exchange servers
2203         // First queris the FXS database for the file name, using the run, detector, id and source info
2204         // then calls RetrieveFile(filename) for actual copy to local disk
2205         // run: current run being processed (given by Logbook entry fLogbookEntry)
2206         // detector: the Preprocessor name
2207         // id: provided as a parameter by the Preprocessor
2208         // source: provided by the Preprocessor through GetFileSources function
2209
2210         // check if test mode should simulate a FXS error
2211         if (fTestMode & kErrorFXSFiles)
2212         {
2213                 Log(detector, Form("GetFile - In TESTMODE - Simulating error while connecting to %s FXS", GetSystemName(system)));
2214                 return 0;
2215         }
2216         
2217         // check connection, in case connect
2218         if (!Connect(system))
2219         {
2220                 Log(detector, Form("GetFile - Couldn't connect to %s FXS database", GetSystemName(system)));
2221                 return 0;
2222         }
2223
2224         // Query preparation
2225         TString sourceName(source);
2226         Int_t nFields = 3;
2227         TString sqlQueryStart = Form("select filePath,size,fileChecksum from %s where",
2228                                                                 fConfig->GetFXSdbTable(system));
2229         TString whereClause = Form("run=%d and detector=\"%s\" and fileId=\"%s\"",
2230                                                                 GetCurrentRun(), detector, id);
2231
2232         if (system == kDAQ)
2233         {
2234                 whereClause += Form(" and DAQsource=\"%s\"", source);
2235         }
2236         else if (system == kDCS)
2237         {
2238                 sourceName="none";
2239         }
2240         else if (system == kHLT)
2241         {
2242                 whereClause += Form(" and DDLnumbers=\"%s\"", source);
2243                 nFields = 3;
2244         }
2245
2246         TString sqlQuery = Form("%s %s", sqlQueryStart.Data(), whereClause.Data());
2247
2248         AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
2249
2250         // Query execution
2251         TSQLResult* aResult = 0;
2252         aResult = dynamic_cast<TSQLResult*> (fServer[system]->Query(sqlQuery));
2253         if (!aResult) {
2254                 Log(detector, Form("GetFileName - Can't execute SQL query to %s database for: id = %s, source = %s",
2255                                 GetSystemName(system), id, sourceName.Data()));
2256                 return 0;
2257         }
2258
2259         if(aResult->GetRowCount() == 0)
2260         {
2261                 Log(detector,
2262                         Form("GetFileName - No entry in %s FXS db for: id = %s, source = %s",
2263                                 GetSystemName(system), id, sourceName.Data()));
2264                 delete aResult;
2265                 return 0;
2266         }
2267
2268         if (aResult->GetRowCount() > 1) {
2269                 Log(detector,
2270                         Form("GetFileName - More than one entry in %s FXS db for: id = %s, source = %s",
2271                                 GetSystemName(system), id, sourceName.Data()));
2272                 delete aResult;
2273                 return 0;
2274         }
2275
2276         if (aResult->GetFieldCount() != nFields) {
2277                 Log(detector,
2278                         Form("GetFileName - Wrong field count in %s FXS db for: id = %s, source = %s",
2279                                 GetSystemName(system), id, sourceName.Data()));
2280                 delete aResult;
2281                 return 0;
2282         }
2283
2284         TSQLRow* aRow = dynamic_cast<TSQLRow*> (aResult->Next());
2285
2286         if (!aRow){
2287                 Log(detector, Form("GetFileName - Empty set result in %s FXS db from query: id = %s, source = %s",
2288                                 GetSystemName(system), id, sourceName.Data()));
2289                 delete aResult;
2290                 return 0;
2291         }
2292
2293         TString filePath(aRow->GetField(0), aRow->GetFieldLength(0));
2294         TString fileSize(aRow->GetField(1), aRow->GetFieldLength(1));
2295         TString fileChecksum(aRow->GetField(2), aRow->GetFieldLength(2));
2296
2297         delete aResult;
2298         delete aRow;
2299
2300         AliDebug(2, Form("filePath = %s; size = %s, fileChecksum = %s",
2301                                 filePath.Data(), fileSize.Data(), fileChecksum.Data()));
2302
2303         // retrieved file is renamed to make it unique
2304         TString localFileName = Form("%s/%s_%d_process/%s_%s_%d_%s_%s.shuttle",
2305                                         GetShuttleTempDir(), detector, GetCurrentRun(),
2306                                         GetSystemName(system), detector, GetCurrentRun(), 
2307                                         id, sourceName.Data());
2308
2309
2310         // file retrieval from FXS
2311         UInt_t nRetries = 0;
2312         UInt_t maxRetries = 3;
2313         Bool_t result = kFALSE;
2314
2315         // copy!! if successful TSystem::Exec returns 0
2316         while(nRetries++ < maxRetries) {
2317                 AliDebug(2, Form("Trying to copy file. Retry # %d", nRetries));
2318                 result = RetrieveFile(system, filePath.Data(), localFileName.Data());
2319                 if(!result)
2320                 {
2321                         Log(detector, Form("GetFileName - Copy of file %s from %s FXS failed",
2322                                         filePath.Data(), GetSystemName(system)));
2323                         continue;
2324                 } 
2325
2326                 if (fileChecksum.Length()>0)
2327                 {
2328                         // compare md5sum of local file with the one stored in the FXS DB
2329                         Int_t md5Comp = gSystem->Exec(Form("md5sum %s |grep %s 2>&1 > /dev/null",
2330                                                 localFileName.Data(), fileChecksum.Data()));
2331
2332                         if (md5Comp != 0)
2333                         {
2334                                 Log(detector, Form("GetFileName - md5sum of file %s does not match with local copy!",
2335                                                         filePath.Data()));
2336                                 result = kFALSE;
2337                                 continue;
2338                         }
2339                 } else {
2340                         Log(fCurrentDetector, Form("GetFile - md5sum of file %s not set in %s database, skipping comparison",
2341                                                         filePath.Data(), GetSystemName(system)));
2342                 }
2343                 if (result) break;
2344         }
2345
2346         if(!result) return 0;
2347
2348         fFXSCalled[system]=kTRUE;
2349         TObjString *fileParams = new TObjString(Form("%s#!?!#%s", id, sourceName.Data()));
2350         fFXSlist[system].Add(fileParams);
2351
2352         static TString staticLocalFileName;
2353         staticLocalFileName.Form("%s", localFileName.Data());
2354         
2355         Log(fCurrentDetector, Form("GetFile - Retrieved file with id %s and "
2356                         "source %s from %s to %s", id, source, 
2357                         GetSystemName(system), localFileName.Data()));
2358                         
2359         return staticLocalFileName.Data();
2360 }
2361
2362 //______________________________________________________________________________________________
2363 Bool_t AliShuttle::RetrieveFile(UInt_t system, const char* fxsFileName, const char* localFileName)
2364 {
2365         //
2366         // Copies file from FXS to local Shuttle machine
2367         //
2368
2369         // check temp directory: trying to cd to temp; if it does not exist, create it
2370         AliDebug(2, Form("Copy file %s from %s FXS into %s",
2371                         GetSystemName(system), fxsFileName, localFileName));
2372                         
2373         TString tmpDir(localFileName);
2374         
2375         tmpDir = tmpDir(0,tmpDir.Last('/'));
2376
2377         Int_t noDir = gSystem->GetPathInfo(tmpDir.Data(), 0, (Long64_t*) 0, 0, 0);
2378         if (noDir) // temp dir does not exists!
2379         {
2380                 if (gSystem->mkdir(tmpDir.Data(), 1))
2381                 {
2382                         Log(fCurrentDetector.Data(), "RetrieveFile - could not make temp directory!!");
2383                         return kFALSE;
2384                 }
2385         }
2386
2387         TString baseFXSFolder;
2388         if (system == kDAQ)
2389         {
2390                 baseFXSFolder = "FES/";
2391         }
2392         else if (system == kDCS)
2393         {
2394                 baseFXSFolder = "";
2395         }
2396         else if (system == kHLT)
2397         {
2398                 baseFXSFolder = "/opt/FXS/";
2399         }
2400
2401
2402         TString command = Form("scp -oPort=%d -2 %s@%s:%s%s %s",
2403                 fConfig->GetFXSPort(system),
2404                 fConfig->GetFXSUser(system),
2405                 fConfig->GetFXSHost(system),
2406                 baseFXSFolder.Data(),
2407                 fxsFileName,
2408                 localFileName);
2409
2410         AliDebug(2, Form("%s",command.Data()));
2411
2412         Bool_t result = (gSystem->Exec(command.Data()) == 0);
2413
2414         return result;
2415 }
2416
2417 //______________________________________________________________________________________________
2418 TList* AliShuttle::GetFileSources(Int_t system, const char* detector, const char* id)
2419 {
2420         //
2421         // Get sources producing the condition file Id from file exchange servers
2422         // if id is NULL all sources are returned (distinct)
2423         //
2424
2425         Log(detector, Form("GetFileSources - Retrieving sources with id %s from %s", id, GetSystemName(system)));
2426         
2427         // check if test mode should simulate a FXS error
2428         if (fTestMode & kErrorFXSSources)
2429         {
2430                 Log(detector, Form("GetFileSources - In TESTMODE - Simulating error while connecting to %s FXS", GetSystemName(system)));
2431                 return 0;
2432         }
2433
2434         if (system == kDCS)
2435         {
2436                 Log(detector, "GetFileSources - WARNING: DCS system has only one source of data!");
2437                 TList *list = new TList();
2438                 list->SetOwner(1);
2439                 list->Add(new TObjString(" "));
2440                 return list;
2441         }
2442
2443         // check connection, in case connect
2444         if (!Connect(system))
2445         {
2446                 Log(detector, Form("GetFileSources - Couldn't connect to %s FXS database", GetSystemName(system)));
2447                 return NULL;
2448         }
2449
2450         TString sourceName = 0;
2451         if (system == kDAQ)
2452         {
2453                 sourceName = "DAQsource";
2454         } else if (system == kHLT)
2455         {
2456                 sourceName = "DDLnumbers";
2457         }
2458
2459         TString sqlQueryStart = Form("select distinct %s from %s where", sourceName.Data(), fConfig->GetFXSdbTable(system));
2460         TString whereClause = Form("run=%d and detector=\"%s\"",
2461                                 GetCurrentRun(), detector);
2462         if (id)
2463                 whereClause += Form(" and fileId=\"%s\"", id);
2464         TString sqlQuery = Form("%s %s", sqlQueryStart.Data(), whereClause.Data());
2465
2466         AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
2467
2468         // Query execution
2469         TSQLResult* aResult;
2470         aResult = fServer[system]->Query(sqlQuery);
2471         if (!aResult) {
2472                 Log(detector, Form("GetFileSources - Can't execute SQL query to %s database for id: %s",
2473                                 GetSystemName(system), id));
2474                 return 0;
2475         }
2476
2477         TList *list = new TList();
2478         list->SetOwner(1);
2479         
2480         if (aResult->GetRowCount() == 0)
2481         {
2482                 Log(detector,
2483                         Form("GetFileSources - No entry in %s FXS table for id: %s", GetSystemName(system), id));
2484                 delete aResult;
2485                 return list;
2486         }
2487
2488         Log(detector, Form("GetFileSources - Found %d sources", aResult->GetRowCount()));
2489
2490         TSQLRow* aRow;
2491         while ((aRow = aResult->Next()))
2492         {
2493
2494                 TString source(aRow->GetField(0), aRow->GetFieldLength(0));
2495                 AliDebug(2, Form("%s = %s", sourceName.Data(), source.Data()));
2496                 list->Add(new TObjString(source));
2497                 delete aRow;
2498         }
2499
2500         delete aResult;
2501
2502         return list;
2503 }
2504
2505 //______________________________________________________________________________________________
2506 TList* AliShuttle::GetFileIDs(Int_t system, const char* detector, const char* source)
2507 {
2508         //
2509         // Get all ids of condition files produced by a given source from file exchange servers
2510         //
2511         
2512         Log(detector, Form("GetFileIDs - Retrieving ids with source %s with %s", source, GetSystemName(system)));
2513
2514         // check if test mode should simulate a FXS error
2515         if (fTestMode & kErrorFXSSources)
2516         {
2517                 Log(detector, Form("GetFileIDs - In TESTMODE - Simulating error while connecting to %s FXS", GetSystemName(system)));
2518                 return 0;
2519         }
2520
2521         // check connection, in case connect
2522         if (!Connect(system))
2523         {
2524                 Log(detector, Form("GetFileIDs - Couldn't connect to %s FXS database", GetSystemName(system)));
2525                 return NULL;
2526         }
2527
2528         TString sourceName = 0;
2529         if (system == kDAQ)
2530         {
2531                 sourceName = "DAQsource";
2532         } else if (system == kHLT)
2533         {
2534                 sourceName = "DDLnumbers";
2535         }
2536
2537         TString sqlQueryStart = Form("select fileId from %s where", fConfig->GetFXSdbTable(system));
2538         TString whereClause = Form("run=%d and detector=\"%s\"",
2539                                 GetCurrentRun(), detector);
2540         if (sourceName.Length() > 0 && source)
2541                 whereClause += Form(" and %s=\"%s\"", sourceName.Data(), source);
2542         TString sqlQuery = Form("%s %s", sqlQueryStart.Data(), whereClause.Data());
2543
2544         AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
2545
2546         // Query execution
2547         TSQLResult* aResult;
2548         aResult = fServer[system]->Query(sqlQuery);
2549         if (!aResult) {
2550                 Log(detector, Form("GetFileIDs - Can't execute SQL query to %s database for source: %s",
2551                                 GetSystemName(system), source));
2552                 return 0;
2553         }
2554
2555         TList *list = new TList();
2556         list->SetOwner(1);
2557         
2558         if (aResult->GetRowCount() == 0)
2559         {
2560                 Log(detector,
2561                         Form("GetFileIDs - No entry in %s FXS table for source: %s", GetSystemName(system), source));
2562                 delete aResult;
2563                 return list;
2564         }
2565
2566         Log(detector, Form("GetFileIDs - Found %d ids", aResult->GetRowCount()));
2567
2568         TSQLRow* aRow;
2569
2570         while ((aRow = aResult->Next()))
2571         {
2572
2573                 TString id(aRow->GetField(0), aRow->GetFieldLength(0));
2574                 AliDebug(2, Form("fileId = %s", id.Data()));
2575                 list->Add(new TObjString(id));
2576                 delete aRow;
2577         }
2578
2579         delete aResult;
2580
2581         return list;
2582 }
2583
2584 //______________________________________________________________________________________________
2585 Bool_t AliShuttle::Connect(Int_t system)
2586 {
2587         // Connect to MySQL Server of the system's FXS MySQL databases
2588         // DAQ Logbook, Shuttle Logbook and DAQ FXS db are on the same host
2589         //
2590
2591         // check connection: if already connected return
2592         if(fServer[system] && fServer[system]->IsConnected()) return kTRUE;
2593
2594         TString dbHost, dbUser, dbPass, dbName;
2595
2596         if (system < 3) // FXS db servers
2597         {
2598                 dbHost = Form("mysql://%s:%d", fConfig->GetFXSdbHost(system), fConfig->GetFXSdbPort(system));
2599                 dbUser = fConfig->GetFXSdbUser(system);
2600                 dbPass = fConfig->GetFXSdbPass(system);
2601                 dbName =   fConfig->GetFXSdbName(system);
2602         } else { // Run & Shuttle logbook servers
2603         // TODO Will the Shuttle logbook server be the same as the Run logbook server ???
2604                 dbHost = Form("mysql://%s:%d", fConfig->GetDAQlbHost(), fConfig->GetDAQlbPort());
2605                 dbUser = fConfig->GetDAQlbUser();
2606                 dbPass = fConfig->GetDAQlbPass();
2607                 dbName =   fConfig->GetDAQlbDB();
2608         }
2609
2610         fServer[system] = TSQLServer::Connect(dbHost.Data(), dbUser.Data(), dbPass.Data());
2611         if (!fServer[system] || !fServer[system]->IsConnected()) {
2612                 if(system < 3)
2613                 {
2614                 AliError(Form("Can't establish connection to FXS database for %s",
2615                                         AliShuttleInterface::GetSystemName(system)));
2616                 } else {
2617                 AliError("Can't establish connection to Run logbook.");
2618                 }
2619                 if(fServer[system]) delete fServer[system];
2620                 return kFALSE;
2621         }
2622
2623         // Get tables
2624         TSQLResult* aResult=0;
2625         switch(system){
2626                 case kDAQ:
2627                         aResult = fServer[kDAQ]->GetTables(dbName.Data());
2628                         break;
2629                 case kDCS:
2630                         aResult = fServer[kDCS]->GetTables(dbName.Data());
2631                         break;
2632                 case kHLT:
2633                         aResult = fServer[kHLT]->GetTables(dbName.Data());
2634                         break;
2635                 default:
2636                         aResult = fServer[3]->GetTables(dbName.Data());
2637                         break;
2638         }
2639
2640         delete aResult;
2641         return kTRUE;
2642 }
2643
2644 //______________________________________________________________________________________________
2645 Bool_t AliShuttle::UpdateTable()
2646 {
2647         //
2648         // Update FXS table filling time_processed field in all rows corresponding to current run and detector
2649         //
2650
2651         Bool_t result = kTRUE;
2652
2653         for (UInt_t system=0; system<3; system++)
2654         {
2655                 if(!fFXSCalled[system]) continue;
2656
2657                 // check connection, in case connect
2658                 if (!Connect(system))
2659                 {
2660                         Log(fCurrentDetector, Form("UpdateTable - Couldn't connect to %s FXS database", GetSystemName(system)));
2661                         result = kFALSE;
2662                         continue;
2663                 }
2664
2665                 TTimeStamp now; // now
2666
2667                 // Loop on FXS list entries
2668                 TIter iter(&fFXSlist[system]);
2669                 TObjString *aFXSentry=0;
2670                 while ((aFXSentry = dynamic_cast<TObjString*> (iter.Next())))
2671                 {
2672                         TString aFXSentrystr = aFXSentry->String();
2673                         TObjArray *aFXSarray = aFXSentrystr.Tokenize("#!?!#");
2674                         if (!aFXSarray || aFXSarray->GetEntries() != 2 )
2675                         {
2676                                 Log(fCurrentDetector, Form("UpdateTable - error updating %s FXS entry. Check string: <%s>",
2677                                         GetSystemName(system), aFXSentrystr.Data()));
2678                                 if(aFXSarray) delete aFXSarray;
2679                                 result = kFALSE;
2680                                 continue;
2681                         }
2682                         const char* fileId = ((TObjString*) aFXSarray->At(0))->GetName();
2683                         const char* source = ((TObjString*) aFXSarray->At(1))->GetName();
2684
2685                         TString whereClause;
2686                         if (system == kDAQ)
2687                         {
2688                                 whereClause = Form("where run=%d and detector=\"%s\" and fileId=\"%s\" and DAQsource=\"%s\";",
2689                                                         GetCurrentRun(), fCurrentDetector.Data(), fileId, source);
2690                         }
2691                         else if (system == kDCS)
2692                         {
2693                                 whereClause = Form("where run=%d and detector=\"%s\" and fileId=\"%s\";",
2694                                                         GetCurrentRun(), fCurrentDetector.Data(), fileId);
2695                         }
2696                         else if (system == kHLT)
2697                         {
2698                                 whereClause = Form("where run=%d and detector=\"%s\" and fileId=\"%s\" and DDLnumbers=\"%s\";",
2699                                                         GetCurrentRun(), fCurrentDetector.Data(), fileId, source);
2700                         }
2701
2702                         delete aFXSarray;
2703
2704                         TString sqlQuery = Form("update %s set time_processed=%d %s", fConfig->GetFXSdbTable(system),
2705                                                                 now.GetSec(), whereClause.Data());
2706
2707                         AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
2708
2709                         // Query execution
2710                         TSQLResult* aResult;
2711                         aResult = dynamic_cast<TSQLResult*> (fServer[system]->Query(sqlQuery));
2712                         if (!aResult)
2713                         {
2714                                 Log(fCurrentDetector, Form("UpdateTable - %s db: can't execute SQL query <%s>",
2715                                                                 GetSystemName(system), sqlQuery.Data()));
2716                                 result = kFALSE;
2717                                 continue;
2718                         }
2719                         delete aResult;
2720                 }
2721         }
2722
2723         return result;
2724 }
2725
2726 //______________________________________________________________________________________________
2727 Bool_t AliShuttle::UpdateTableFailCase()
2728 {
2729         // Update FXS table filling time_processed field in all rows corresponding to current run and detector
2730         // this is called in case the preprocessor is declared failed for the current run, because
2731         // the fields are updated only in case of success
2732
2733         Bool_t result = kTRUE;
2734
2735         for (UInt_t system=0; system<3; system++)
2736         {
2737                 // check connection, in case connect
2738                 if (!Connect(system))
2739                 {
2740                         Log(fCurrentDetector, Form("UpdateTableFailCase - Couldn't connect to %s FXS database",
2741                                                         GetSystemName(system)));
2742                         result = kFALSE;
2743                         continue;
2744                 }
2745
2746                 TTimeStamp now; // now
2747
2748                 // Loop on FXS list entries
2749
2750                 TString whereClause = Form("where run=%d and detector=\"%s\";",
2751                                                 GetCurrentRun(), fCurrentDetector.Data());
2752
2753
2754                 TString sqlQuery = Form("update %s set time_processed=%d %s", fConfig->GetFXSdbTable(system),
2755                                                         now.GetSec(), whereClause.Data());
2756
2757                 AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
2758
2759                 // Query execution
2760                 TSQLResult* aResult;
2761                 aResult = dynamic_cast<TSQLResult*> (fServer[system]->Query(sqlQuery));
2762                 if (!aResult)
2763                 {
2764                         Log(fCurrentDetector, Form("UpdateTableFailCase - %s db: can't execute SQL query <%s>",
2765                                                         GetSystemName(system), sqlQuery.Data()));
2766                         result = kFALSE;
2767                         continue;
2768                 }
2769                 delete aResult;
2770         }
2771
2772         return result;
2773 }
2774
2775 //______________________________________________________________________________________________
2776 Bool_t AliShuttle::UpdateShuttleLogbook(const char* detector, const char* status)
2777 {
2778         //
2779         // Update Shuttle logbook filling detector or shuttle_done column
2780         // ex. of usage: UpdateShuttleLogbook("PHOS", "DONE") or UpdateShuttleLogbook("shuttle_done")
2781         //
2782
2783         // check connection, in case connect
2784         if(!Connect(3)){
2785                 Log("SHUTTLE", "UpdateShuttleLogbook - Couldn't connect to DAQ Logbook.");
2786                 return kFALSE;
2787         }
2788
2789         TString detName(detector);
2790         TString setClause;
2791         if (detName == "shuttle_done" || detName == "shuttle_ignored")
2792         {
2793                 setClause = "set shuttle_done=1";
2794
2795                 if (detName == "shuttle_done")
2796                 {
2797                         // Send the information to ML
2798                         TMonaLisaText  mlStatus("SHUTTLE_status", "Done");
2799
2800                         TList mlList;
2801                         mlList.Add(&mlStatus);
2802                 
2803                         TString mlID;
2804                         mlID.Form("%d", GetCurrentRun());
2805                         fMonaLisa->SendParameters(&mlList, mlID);
2806                 }
2807         } else {
2808                 TString statusStr(status);
2809                 if(statusStr.Contains("done", TString::kIgnoreCase) ||
2810                    statusStr.Contains("failed", TString::kIgnoreCase)){
2811                         setClause = Form("set %s=\"%s\"", detector, status);
2812                 } else {
2813                         Log("SHUTTLE",
2814                                 Form("UpdateShuttleLogbook - Invalid status <%s> for detector %s",
2815                                         status, detector));
2816                         return kFALSE;
2817                 }
2818         }
2819
2820         TString whereClause = Form("where run=%d", GetCurrentRun());
2821
2822         TString sqlQuery = Form("update %s %s %s",
2823                                         fConfig->GetShuttlelbTable(), setClause.Data(), whereClause.Data());
2824
2825         AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
2826
2827         // Query execution
2828         TSQLResult* aResult;
2829         aResult = dynamic_cast<TSQLResult*> (fServer[3]->Query(sqlQuery));
2830         if (!aResult) {
2831                 Log("SHUTTLE", Form("UpdateShuttleLogbook - Can't execute query <%s>", sqlQuery.Data()));
2832                 return kFALSE;
2833         }
2834         delete aResult;
2835
2836         return kTRUE;
2837 }
2838
2839 //______________________________________________________________________________________________
2840 Int_t AliShuttle::GetCurrentRun() const
2841 {
2842         //
2843         // Get current run from logbook entry
2844         //
2845
2846         return fLogbookEntry ? fLogbookEntry->GetRun() : -1;
2847 }
2848
2849 //______________________________________________________________________________________________
2850 UInt_t AliShuttle::GetCurrentStartTime() const
2851 {
2852         //
2853         // get current start time
2854         //
2855
2856         return fLogbookEntry ? fLogbookEntry->GetStartTime() : 0;
2857 }
2858
2859 //______________________________________________________________________________________________
2860 UInt_t AliShuttle::GetCurrentEndTime() const
2861 {
2862         //
2863         // get current end time from logbook entry
2864         //
2865
2866         return fLogbookEntry ? fLogbookEntry->GetEndTime() : 0;
2867 }
2868
2869 //______________________________________________________________________________________________
2870 UInt_t AliShuttle::GetCurrentYear() const
2871 {
2872         //
2873         // Get current year from logbook entry
2874         //
2875
2876         if (!fLogbookEntry) return 0;
2877         
2878         TTimeStamp startTime(GetCurrentStartTime());
2879         TString year =  Form("%d",startTime.GetDate());
2880         year = year(0,4);
2881         
2882         return year.Atoi();
2883 }
2884
2885 //______________________________________________________________________________________________
2886 const char* AliShuttle::GetLHCPeriod() const
2887 {
2888         //
2889         // Get current LHC period from logbook entry
2890         //
2891
2892         if (!fLogbookEntry) return 0;
2893                 
2894         return fLogbookEntry->GetRunParameter("LHCperiod");
2895 }
2896
2897 //______________________________________________________________________________________________
2898 void AliShuttle::Log(const char* detector, const char* message)
2899 {
2900         //
2901         // Fill log string with a message
2902         //
2903
2904         TString logRunDir = GetShuttleLogDir();
2905         if (GetCurrentRun() >=0)
2906                 logRunDir += Form("/%d", GetCurrentRun());
2907         
2908         void* dir = gSystem->OpenDirectory(logRunDir.Data());
2909         if (dir == NULL) {
2910                 if (gSystem->mkdir(logRunDir.Data(), kTRUE)) {
2911                         AliError(Form("Can't open directory <%s>", GetShuttleLogDir()));
2912                         return;
2913                 }
2914
2915         } else {
2916                 gSystem->FreeDirectory(dir);
2917         }
2918
2919         TString toLog = Form("%s (%d): %s - ", TTimeStamp(time(0)).AsString("s"), getpid(), detector);
2920         if (GetCurrentRun() >= 0) 
2921                 toLog += Form("run %d - ", GetCurrentRun());
2922         toLog += Form("%s", message);
2923
2924         AliInfo(toLog.Data());
2925         
2926         // if we redirect the log output already to the file, leave here
2927         if (fOutputRedirected && strcmp(detector, "SHUTTLE") != 0)
2928                 return;
2929
2930         TString fileName = GetLogFileName(detector);
2931         
2932         gSystem->ExpandPathName(fileName);
2933
2934         ofstream logFile;
2935         logFile.open(fileName, ofstream::out | ofstream::app);
2936
2937         if (!logFile.is_open()) {
2938                 AliError(Form("Could not open file %s", fileName.Data()));
2939                 return;
2940         }
2941
2942         logFile << toLog.Data() << "\n";
2943
2944         logFile.close();
2945 }
2946
2947 //______________________________________________________________________________________________
2948 TString AliShuttle::GetLogFileName(const char* detector) const
2949 {
2950         // 
2951         // returns the name of the log file for a given sub detector
2952         //
2953         
2954         TString fileName;
2955         
2956         if (GetCurrentRun() >= 0) 
2957         {
2958                 fileName.Form("%s/%d/%s_%d.log", GetShuttleLogDir(), GetCurrentRun(), 
2959                         detector, GetCurrentRun());
2960         } else {
2961                 fileName.Form("%s/%s.log", GetShuttleLogDir(), detector);
2962         }
2963
2964         return fileName;
2965 }
2966
2967 //______________________________________________________________________________________________
2968 void AliShuttle::SendAlive()
2969 {
2970         // sends alive message to ML
2971         
2972         TMonaLisaText mlStatus("SHUTTLE_status", "Alive");
2973
2974         TList mlList;
2975         mlList.Add(&mlStatus);
2976
2977         fMonaLisa->SendParameters(&mlList, "__PROCESSINGINFO__");
2978 }
2979
2980 //______________________________________________________________________________________________
2981 Bool_t AliShuttle::Collect(Int_t run)
2982 {
2983         //
2984         // Collects conditions data for all UNPROCESSED run written to DAQ LogBook in case of run = -1 (default)
2985         // If a dedicated run is given this run is processed
2986         //
2987         // In operational mode, this is the Shuttle function triggered by the EOR signal.
2988         //
2989
2990         if (run == -1)
2991                 Log("SHUTTLE","Collect - Shuttle called. Collecting conditions data for unprocessed runs");
2992         else
2993                 Log("SHUTTLE", Form("Collect - Shuttle called. Collecting conditions data for run %d", run));
2994
2995         SetLastAction("Starting");
2996
2997         // create ML instance
2998         if (!fMonaLisa)
2999                 fMonaLisa = new TMonaLisaWriter(fConfig->GetMonitorHost(), fConfig->GetMonitorTable());
3000                 
3001
3002         SendAlive();
3003
3004         TString whereClause("where shuttle_done=0");
3005         if (run != -1)
3006                 whereClause += Form(" and run=%d", run);
3007
3008         TObjArray shuttleLogbookEntries;
3009         if (!QueryShuttleLogbook(whereClause, shuttleLogbookEntries))
3010         {
3011                 Log("SHUTTLE", "Collect - Can't retrieve entries from Shuttle logbook");
3012                 return kFALSE;
3013         }
3014
3015         if (shuttleLogbookEntries.GetEntries() == 0)
3016         {
3017                 if (run == -1)
3018                         Log("SHUTTLE","Collect - Found no UNPROCESSED runs in Shuttle logbook");
3019                 else
3020                         Log("SHUTTLE", Form("Collect - Run %d is already DONE "
3021                                                 "or it does not exist in Shuttle logbook", run));
3022                 return kTRUE;
3023         }
3024
3025         for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
3026                 fFirstUnprocessed[iDet] = kTRUE;
3027
3028         if (run != -1)
3029         {
3030                 // query Shuttle logbook for earlier runs, check if some detectors are unprocessed,
3031                 // flag them into fFirstUnprocessed array
3032                 TString whereClause(Form("where shuttle_done=0 and run < %d", run));
3033                 TObjArray tmpLogbookEntries;
3034                 if (!QueryShuttleLogbook(whereClause, tmpLogbookEntries))
3035                 {
3036                         Log("SHUTTLE", "Collect - Can't retrieve entries from Shuttle logbook");
3037                         return kFALSE;
3038                 }
3039
3040                 TIter iter(&tmpLogbookEntries);
3041                 AliShuttleLogbookEntry* anEntry = 0;
3042                 while ((anEntry = dynamic_cast<AliShuttleLogbookEntry*> (iter.Next())))
3043                 {
3044                         for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
3045                         {
3046                                 if (anEntry->GetDetectorStatus(iDet) == AliShuttleLogbookEntry::kUnprocessed)
3047                                 {
3048                                         AliDebug(2, Form("Run %d: setting %s as \"not first time unprocessed\"",
3049                                                         anEntry->GetRun(), GetDetName(iDet)));
3050                                         fFirstUnprocessed[iDet] = kFALSE;
3051                                 }
3052                         }
3053
3054                 }
3055
3056         }
3057
3058         if (!RetrieveConditionsData(shuttleLogbookEntries))
3059         {
3060                 Log("SHUTTLE", "Collect - Process of at least one run failed");
3061                 return kFALSE;
3062         }
3063
3064         Log("SHUTTLE", "Collect - Requested run(s) successfully processed");
3065         return kTRUE;
3066 }
3067
3068 //______________________________________________________________________________________________
3069 Bool_t AliShuttle::RetrieveConditionsData(const TObjArray& dateEntries)
3070 {
3071         //
3072         // Retrieve conditions data for all runs that aren't processed yet
3073         //
3074
3075         Bool_t hasError = kFALSE;
3076
3077         TIter iter(&dateEntries);
3078         AliShuttleLogbookEntry* anEntry;
3079
3080         while ((anEntry = (AliShuttleLogbookEntry*) iter.Next())){
3081                 if (!Process(anEntry)){
3082                         hasError = kTRUE;
3083                 }
3084
3085                 // clean SHUTTLE temp directory
3086                 //TString filename = Form("%s/*.shuttle", GetShuttleTempDir());