]> git.uio.no Git - u/mrichter/AliRoot.git/blob - SHUTTLE/AliShuttle.cxx
7236beca2a26a595cd82458e87bebc86adc8ad5b
[u/mrichter/AliRoot.git] / SHUTTLE / AliShuttle.cxx
1 /**************************************************************************
2  * Copyright(c) 1998-1999, ALICE Experiment at CERN, All rights reserved. *
3  *                                                                        *
4  * Author: The ALICE Off-line Project.                                    *
5  * Contributors are mentioned in the code where appropriate.              *
6  *                                                                        *
7  * Permission to use, copy, modify and distribute this software and its   *
8  * documentation strictly for non-commercial purposes is hereby granted   *
9  * without fee, provided that the above copyright notice appears in all   *
10  * copies and that both the copyright notice and this permission notice   *
11  * appear in the supporting documentation. The authors make no claims     *
12  * about the suitability of this software for any purpose. It is          *
13  * provided "as is" without express or implied warranty.                  *
14  **************************************************************************/
15
16 /*
17 $Log$
18 Revision 1.82  2007/12/20 16:29:43  jgrosseo
19 sending number of open runs also at the end of processing
20
21 Revision 1.81  2007/12/20 14:24:59  jgrosseo
22 Do not increase count in case of StoreError
23
24 Revision 1.80  2007/12/20 13:31:28  acolla
25 Bug fix (Jan Fiete): recovering from StoreError, if the store to OCDB is successful,
26 the Shuttle sets current detector's status=done
27
28 Revision 1.79  2007/12/19 14:03:01  acolla
29
30 detector name to build the lhcPeriod_DET is to be looked in "detector" column, not "partition"
31
32 Revision 1.78  2007/12/19 11:50:41  acolla
33
34 Raw data tag merged files is written in /alice/data/.../lhcPeriod_DET/runNb/raw if partition is made of DET only
35
36 Revision 1.77  2007/12/19 11:16:16  acolla
37 More meaningful log message added in GetFileSources
38
39 Revision 1.76  2007/12/19 07:45:20  acolla
40 bug fix in the name of the raw tag files (Raw instead of raw)
41
42 Revision 1.75  2007/12/18 15:42:14  jgrosseo
43 adding number of open runs to monitoring
44
45 Revision 1.74  2007/12/17 03:23:32  jgrosseo
46 several bugfixes
47 added "empty preprocessor" as placeholder for Acorde in FDR
48
49 Revision 1.73  2007/12/14 19:31:36  acolla
50 Sending email to DCS experts is temporarily commented
51
52 Revision 1.72  2007/12/13 15:44:28  acolla
53 Run type added in mail sent to detector expert (eases understanding)
54
55 Revision 1.71  2007/12/12 14:56:14  jgrosseo
56 sending shuttle_ignore to ML also in case of 0 events
57
58 Revision 1.70  2007/12/12 13:45:35  acolla
59 Monalisa started in Collect() function. Alive message to monitor is sent at each Collect and every minute during preprocessor processing.
60
61 Revision 1.69  2007/12/12 10:06:29  acolla
62 in AliShuttle.cxx: SHUTTLE logbook is updated in case of invalid run times:
63
64 time_start==0 && time_end==0
65
66 logbook is NOT updated if time_start != 0 && time_end == 0, because it may mean that the run is still ongoing.
67
68 Revision 1.68  2007/12/11 10:15:17  acolla
69 Added marking SHUTTLE=DONE for invalid runs
70 (invalid start time or end time) and runs with totalEvents < 1
71
72 Revision 1.67  2007/12/07 19:14:36  acolla
73 in AliShuttleTrigger:
74
75 Added automatic collection of new runs on a regular time basis (settable from the configuration)
76
77 in AliShuttleConfig: new members
78
79 - triggerWait: time to wait for DIM trigger (s) before starting automatic collection of new runs
80 - mode: run mode (test, prod) -> used to build log folder (logs or logs_PROD)
81
82 in AliShuttle:
83
84 - logs now stored in logs/#RUN/DET_#RUN.log
85
86 Revision 1.66  2007/12/05 10:45:19  jgrosseo
87 changed order of arguments to TMonaLisaWriter
88
89 Revision 1.65  2007/11/26 16:58:37  acolla
90 Monalisa configuration added: host and table name
91
92 Revision 1.64  2007/11/13 16:15:47  acolla
93 DCS map is stored in a file in the temp folder where the detector is processed.
94 If the preprocessor fails, the temp folder is not removed. This will help the debugging of the problem.
95
96 Revision 1.63  2007/11/02 10:53:16  acolla
97 Protection added to AliShuttle::CopyFileLocally
98
99 Revision 1.62  2007/10/31 18:23:13  acolla
100 Furter developement on the Shuttle:
101
102 - Shuttle now connects to the Grid as alidaq. The OCDB and Reference folders
103 are now built from /alice/data, e.g.:
104 /alice/data/2007/LHC07a/OCDB
105
106 the year and LHC period are taken from the Shuttle.
107 Raw metadata files are stored by GRP to:
108 /alice/data/2007/LHC07a/<runNb>/Raw/RunMetadata.root
109
110 - Shuttle sends a mail to DCS experts each time DP retrieval fails.
111
112 Revision 1.61  2007/10/30 20:33:51  acolla
113 Improved managing of temporary folders, which weren't correctly handled.
114 Resolved bug introduced in StoreReferenceFile, which caused SPD preprocessor fail.
115
116 Revision 1.60  2007/10/29 18:06:16  acolla
117
118 New function StoreRunMetadataFile added to preprocessor and Shuttle interface
119 This function can be used by GRP only. It stores raw data tags merged file to the
120 raw data folder (e.g. /alice/data/2008/LHC08a/000099999/Raw).
121
122 KNOWN ISSUES:
123
124 1. Shuttle cannot write to /alice/data/ because it belongs to alidaq. Tag file is stored in /alice/simulation/... for the time being.
125 2. Due to a bug in TAlien::Mkdir, the creation of a folder in recursive mode (-p option) does not work. The problem
126 has been corrected in the root package on the Shuttle machine.
127
128 Revision 1.59  2007/10/05 12:40:55  acolla
129
130 Result error code added to AliDCSClient data members (it was "lost" with the new implementation of TMap* GetAliasValues and GetDPValues).
131
132 Revision 1.58  2007/09/28 15:27:40  acolla
133
134 AliDCSClient "multiSplit" option added in the DCS configuration
135 in AliDCSMessage: variable MAX_BODY_SIZE set to 500000
136
137 Revision 1.57  2007/09/27 16:53:13  acolla
138 Detectors can have more than one AMANDA server. SHUTTLE queries the servers sequentially,
139 merges the dcs aliases/DPs in one TMap and sends it to the preprocessor.
140
141 Revision 1.56  2007/09/14 16:46:14  jgrosseo
142 1) Connect and Close are called before and after each query, so one can
143 keep the same AliDCSClient object.
144 2) The splitting of a query is moved to GetDPValues/GetAliasValues.
145 3) Splitting interval can be specified in constructor
146
147 Revision 1.55  2007/08/06 12:26:40  acolla
148 Function Bool_t GetHLTStatus added to preprocessor. It returns the status of HLT
149 read from the run logbook.
150
151 Revision 1.54  2007/07/12 09:51:25  jgrosseo
152 removed duplicated log message in GetFile
153
154 Revision 1.53  2007/07/12 09:26:28  jgrosseo
155 updating hlt fxs base path
156
157 Revision 1.52  2007/07/12 08:06:45  jgrosseo
158 adding log messages in getfile... functions
159 adding not implemented copy constructor in alishuttleconfigholder
160
161 Revision 1.51  2007/07/03 17:24:52  acolla
162 root moved to v5-16-00. TFileMerger->Cp moved to TFile::Cp.
163
164 Revision 1.50  2007/07/02 17:19:32  acolla
165 preprocessor is run in a temp directory that is removed when process is finished.
166
167 Revision 1.49  2007/06/29 10:45:06  acolla
168 Number of columns in MySql Shuttle logbook increased by one (HLT added)
169
170 Revision 1.48  2007/06/21 13:06:19  acolla
171 GetFileSources returns dummy list with 1 source if system=DCS (better than
172 returning error as it was)
173
174 Revision 1.47  2007/06/19 17:28:56  acolla
175 HLT updated; missing map bug removed.
176
177 Revision 1.46  2007/06/09 13:01:09  jgrosseo
178 Switching to retrieval of several DCS DPs at a time (multiDPrequest)
179
180 Revision 1.45  2007/05/30 06:35:20  jgrosseo
181 Adding functionality to the Shuttle/TestShuttle:
182 o) Function to retrieve list of sources from a given system (GetFileSources with id=0)
183 o) Function to retrieve list of IDs for a given source      (GetFileIDs)
184 These functions are needed for dealing with the tag files that are saved for the GRP preprocessor
185 Example code has been added to the TestProcessor in TestShuttle
186
187 Revision 1.44  2007/05/11 16:09:32  acolla
188 Reference files for ITS, MUON and PHOS are now stored in OfflineDetName/OnlineDetName/run_...
189 example: ITS/SPD/100_filename.root
190
191 Revision 1.43  2007/05/10 09:59:51  acolla
192 Various bug fixes in StoreRefFilesToGrid; Cleaning of reference storage before processing detector (CleanReferenceStorage)
193
194 Revision 1.42  2007/05/03 08:01:39  jgrosseo
195 typo in last commit :-(
196
197 Revision 1.41  2007/05/03 08:00:48  jgrosseo
198 fixing log message when pp want to skip dcs value retrieval
199
200 Revision 1.40  2007/04/27 07:06:48  jgrosseo
201 GetFileSources returns empty list in case of no files, but successful query
202 No mails sent in testmode
203
204 Revision 1.39  2007/04/17 12:43:57  acolla
205 Correction in StoreOCDB; change of text in mail to detector expert
206
207 Revision 1.38  2007/04/12 08:26:18  jgrosseo
208 updated comment
209
210 Revision 1.37  2007/04/10 16:53:14  jgrosseo
211 redirecting sub detector stdout, stderr to sub detector log file
212
213 Revision 1.35  2007/04/04 16:26:38  acolla
214 1. Re-organization of function calls in TestPreprocessor to make it more meaningful.
215 2. Added missing dependency in test preprocessors.
216 3. in AliShuttle.cxx: processing time and memory consumption info on a single line.
217
218 Revision 1.34  2007/04/04 10:33:36  jgrosseo
219 1) Storing of files to the Grid is now done _after_ your preprocessors succeeded. This is transparent, which means that you can still use the same functions (Store, StoreReferenceData) to store files to the Grid. However, the Shuttle first stores them locally and transfers them after the preprocessor finished. The return code of these two functions has changed from UInt_t to Bool_t which gives you the success of the storing.
220 In case of an error with the Grid, the Shuttle will retry the storing later, the preprocessor does not need to be run again.
221
222 2) The meaning of the return code of the preprocessor has changed. 0 is now success and any other value means failure. This value is stored in the log and you can use it to keep details about the error condition.
223
224 3) New function StoreReferenceFile to _directly_ store a file (without opening it) to the reference storage.
225
226 4) The memory usage of the preprocessor is monitored. If it exceeds 2 GB it is terminated.
227
228 5) New function AliPreprocessor::ProcessDCS(). If you do not need to have DCS data in all cases, you can skip the processing by implemting this function and returning kFALSE under certain conditions. E.g. if there is a certain run type.
229 If you always need DCS data (like before), you do not need to implement it.
230
231 6) The run type has been added to the monitoring page
232
233 Revision 1.33  2007/04/03 13:56:01  acolla
234 Grid Storage at the end of preprocessing. Added virtual method to disable DCS query according to the
235 run type.
236
237 Revision 1.32  2007/02/28 10:41:56  acolla
238 Run type field added in SHUTTLE framework. Run type is read from "run type" logbook and retrieved by
239 AliPreprocessor::GetRunType() function.
240 Added some ldap definition files.
241
242 Revision 1.30  2007/02/13 11:23:21  acolla
243 Moved getters and setters of Shuttle's main OCDB/Reference, local
244 OCDB/Reference, temp and log folders to AliShuttleInterface
245
246 Revision 1.27  2007/01/30 17:52:42  jgrosseo
247 adding monalisa monitoring
248
249 Revision 1.26  2007/01/23 19:20:03  acolla
250 Removed old ldif files, added TOF, MCH ldif files. Added some options in
251 AliShuttleConfig::Print. Added in Ali Shuttle: SetShuttleTempDir and
252 SetShuttleLogDir
253
254 Revision 1.25  2007/01/15 19:13:52  acolla
255 Moved some AliInfo to AliDebug in SendMail function
256
257 Revision 1.21  2006/12/07 08:51:26  jgrosseo
258 update (alberto):
259 table, db names in ldap configuration
260 added GRP preprocessor
261 DCS data can also be retrieved by data point
262
263 Revision 1.20  2006/11/16 16:16:48  jgrosseo
264 introducing strict run ordering flag
265 removed giving preprocessor name to preprocessor, they have to know their name themselves ;-)
266
267 Revision 1.19  2006/11/06 14:23:04  jgrosseo
268 major update (Alberto)
269 o) reading of run parameters from the logbook
270 o) online offline naming conversion
271 o) standalone DCSclient package
272
273 Revision 1.18  2006/10/20 15:22:59  jgrosseo
274 o) Adding time out to the execution of the preprocessors: The Shuttle forks and the parent process monitors the child
275 o) Merging Collect, CollectAll, CollectNew function
276 o) Removing implementation of empty copy constructors (declaration still there!)
277
278 Revision 1.17  2006/10/05 16:20:55  jgrosseo
279 adapting to new CDB classes
280
281 Revision 1.16  2006/10/05 15:46:26  jgrosseo
282 applying to the new interface
283
284 Revision 1.15  2006/10/02 16:38:39  jgrosseo
285 update (alberto):
286 fixed memory leaks
287 storing of objects that failed to be stored to the grid before
288 interfacing of shuttle status table in daq system
289
290 Revision 1.14  2006/08/29 09:16:05  jgrosseo
291 small update
292
293 Revision 1.13  2006/08/15 10:50:00  jgrosseo
294 effc++ corrections (alberto)
295
296 Revision 1.12  2006/08/08 14:19:29  jgrosseo
297 Update to shuttle classes (Alberto)
298
299 - Possibility to set the full object's path in the Preprocessor's and
300 Shuttle's  Store functions
301 - Possibility to extend the object's run validity in the same classes
302 ("startValidity" and "validityInfinite" parameters)
303 - Implementation of the StoreReferenceData function to store reference
304 data in a dedicated CDB storage.
305
306 Revision 1.11  2006/07/21 07:37:20  jgrosseo
307 last run is stored after each run
308
309 Revision 1.10  2006/07/20 09:54:40  jgrosseo
310 introducing status management: The processing per subdetector is divided into several steps,
311 after each step the status is stored on disk. If the system crashes in any of the steps the Shuttle
312 can keep track of the number of failures and skips further processing after a certain threshold is
313 exceeded. These thresholds can be configured in LDAP.
314
315 Revision 1.9  2006/07/19 10:09:55  jgrosseo
316 new configuration, accesst to DAQ FES (Alberto)
317
318 Revision 1.8  2006/07/11 12:44:36  jgrosseo
319 adding parameters for extended validity range of data produced by preprocessor
320
321 Revision 1.7  2006/07/10 14:37:09  jgrosseo
322 small fix + todo comment
323
324 Revision 1.6  2006/07/10 13:01:41  jgrosseo
325 enhanced storing of last sucessfully processed run (alberto)
326
327 Revision 1.5  2006/07/04 14:59:57  jgrosseo
328 revision of AliDCSValue: Removed wrapper classes, reduced storage size per value by factor 2
329
330 Revision 1.4  2006/06/12 09:11:16  jgrosseo
331 coding conventions (Alberto)
332
333 Revision 1.3  2006/06/06 14:26:40  jgrosseo
334 o) removed files that were moved to STEER
335 o) shuttle updated to follow the new interface (Alberto)
336
337 Revision 1.2  2006/03/07 07:52:34  hristov
338 New version (B.Yordanov)
339
340 Revision 1.6  2005/11/19 17:19:14  byordano
341 RetrieveDATEEntries and RetrieveConditionsData added
342
343 Revision 1.5  2005/11/19 11:09:27  byordano
344 AliShuttle declaration added
345
346 Revision 1.4  2005/11/17 17:47:34  byordano
347 TList changed to TObjArray
348
349 Revision 1.3  2005/11/17 14:43:23  byordano
350 import to local CVS
351
352 Revision 1.1.1.1  2005/10/28 07:33:58  hristov
353 Initial import as subdirectory in AliRoot
354
355 Revision 1.2  2005/09/13 08:41:15  byordano
356 default startTime endTime added
357
358 Revision 1.4  2005/08/30 09:13:02  byordano
359 some docs added
360
361 Revision 1.3  2005/08/29 21:15:47  byordano
362 some docs added
363
364 */
365
366 //
367 // This class is the main manager for AliShuttle. 
368 // It organizes the data retrieval from DCS and call the 
369 // interface methods of AliPreprocessor.
370 // For every detector in AliShuttleConfgi (see AliShuttleConfig),
371 // data for its set of aliases is retrieved. If there is registered
372 // AliPreprocessor for this detector then it will be used
373 // accroding to the schema (see AliPreprocessor).
374 // If there isn't registered AliPreprocessor than the retrieved
375 // data is stored automatically to the undelying AliCDBStorage.
376 // For detSpec is used the alias name.
377 //
378
379 #include "AliShuttle.h"
380
381 #include "AliCDBManager.h"
382 #include "AliCDBStorage.h"
383 #include "AliCDBId.h"
384 #include "AliCDBRunRange.h"
385 #include "AliCDBPath.h"
386 #include "AliCDBEntry.h"
387 #include "AliShuttleConfig.h"
388 #include "DCSClient/AliDCSClient.h"
389 #include "AliLog.h"
390 #include "AliPreprocessor.h"
391 #include "AliShuttleStatus.h"
392 #include "AliShuttleLogbookEntry.h"
393
394 #include <TSystem.h>
395 #include <TObject.h>
396 #include <TString.h>
397 #include <TTimeStamp.h>
398 #include <TObjString.h>
399 #include <TSQLServer.h>
400 #include <TSQLResult.h>
401 #include <TSQLRow.h>
402 #include <TMutex.h>
403 #include <TSystemDirectory.h>
404 #include <TSystemFile.h>
405 #include <TFile.h>
406 #include <TGrid.h>
407 #include <TGridResult.h>
408
409 #include <TMonaLisaWriter.h>
410
411 #include <fstream>
412
413 #include <sys/types.h>
414 #include <sys/wait.h>
415
416 ClassImp(AliShuttle)
417
418 //______________________________________________________________________________________________
419 AliShuttle::AliShuttle(const AliShuttleConfig* config,
420                 UInt_t timeout, Int_t retries):
421 fConfig(config),
422 fTimeout(timeout), fRetries(retries),
423 fPreprocessorMap(),
424 fLogbookEntry(0),
425 fCurrentDetector(),
426 fStatusEntry(0),
427 fMonitoringMutex(0),
428 fLastActionTime(0),
429 fLastAction(),
430 fMonaLisa(0),
431 fTestMode(kNone),
432 fReadTestMode(kFALSE),
433 fOutputRedirected(kFALSE)
434 {
435         //
436         // config: AliShuttleConfig used
437         // timeout: timeout used for AliDCSClient connection
438         // retries: the number of retries in case of connection error.
439         //
440
441         if (!fConfig->IsValid()) AliFatal("********** !!!!! Invalid configuration !!!!! **********");
442         for(int iSys=0;iSys<4;iSys++) {
443                 fServer[iSys]=0;
444                 if (iSys < 3)
445                         fFXSlist[iSys].SetOwner(kTRUE);
446         }
447         fPreprocessorMap.SetOwner(kTRUE);
448
449         for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
450                 fFirstUnprocessed[iDet] = kFALSE;
451
452         fMonitoringMutex = new TMutex();
453 }
454
455 //______________________________________________________________________________________________
456 AliShuttle::~AliShuttle()
457 {
458         //
459         // destructor
460         //
461
462         fPreprocessorMap.DeleteAll();
463         for(int iSys=0;iSys<4;iSys++)
464                 if(fServer[iSys]) {
465                         fServer[iSys]->Close();
466                         delete fServer[iSys];
467                         fServer[iSys] = 0;
468                 }
469
470         if (fStatusEntry){
471                 delete fStatusEntry;
472                 fStatusEntry = 0;
473         }
474         
475         if (fMonitoringMutex) 
476         {
477                 delete fMonitoringMutex;
478                 fMonitoringMutex = 0;
479         }
480 }
481
482 //______________________________________________________________________________________________
483 void AliShuttle::RegisterPreprocessor(AliPreprocessor* preprocessor)
484 {
485         //
486         // Registers new AliPreprocessor.
487         // It uses GetName() for indentificator of the pre processor.
488         // The pre processor is registered it there isn't any other
489         // with the same identificator (GetName()).
490         //
491
492         const char* detName = preprocessor->GetName();
493         if(GetDetPos(detName) < 0)
494                 AliFatal(Form("********** !!!!! Invalid detector name: %s !!!!! **********", detName));
495
496         if (fPreprocessorMap.GetValue(detName)) {
497                 AliWarning(Form("AliPreprocessor %s is already registered!", detName));
498                 return;
499         }
500
501         fPreprocessorMap.Add(new TObjString(detName), preprocessor);
502 }
503 //______________________________________________________________________________________________
504 Bool_t AliShuttle::Store(const AliCDBPath& path, TObject* object,
505                 AliCDBMetaData* metaData, Int_t validityStart, Bool_t validityInfinite)
506 {
507         // Stores a CDB object in the storage for offline reconstruction. Objects that are not needed for
508         // offline reconstruction, but should be stored anyway (e.g. for debugging) should NOT be stored
509         // using this function. Use StoreReferenceData instead!
510         // It calls StoreLocally function which temporarily stores the data locally; when the preprocessor
511         // finishes the data are transferred to the main storage (Grid).
512
513         return StoreLocally(fgkLocalCDB, path, object, metaData, validityStart, validityInfinite);
514 }
515
516 //______________________________________________________________________________________________
517 Bool_t AliShuttle::StoreReferenceData(const AliCDBPath& path, TObject* object, AliCDBMetaData* metaData)
518 {
519         // Stores a CDB object in the storage for reference data. This objects will not be available during
520         // offline reconstrunction. Use this function for reference data only!
521         // It calls StoreLocally function which temporarily stores the data locally; when the preprocessor
522         // finishes the data are transferred to the main storage (Grid).
523
524         return StoreLocally(fgkLocalRefStorage, path, object, metaData);
525 }
526
527 //______________________________________________________________________________________________
528 Bool_t AliShuttle::StoreLocally(const TString& localUri,
529                         const AliCDBPath& path, TObject* object, AliCDBMetaData* metaData,
530                         Int_t validityStart, Bool_t validityInfinite)
531 {
532         // Store object temporarily in local storage. Parameters are passed by Store and StoreReferenceData functions.
533         // when the preprocessor finishes the data are transferred to the main storage (Grid).
534         // The parameters are:
535         //   1) Uri of the backup storage (Local)
536         //   2) the object's path.
537         //   3) the object to be stored
538         //   4) the metaData to be associated with the object
539         //   5) the validity start run number w.r.t. the current run,
540         //      if the data is valid only for this run leave the default 0
541         //   6) specifies if the calibration data is valid for infinity (this means until updated),
542         //      typical for calibration runs, the default is kFALSE
543         //
544         // returns 0 if fail, 1 otherwise
545
546         if (fTestMode & kErrorStorage)
547         {
548                 Log(fCurrentDetector, "StoreLocally - In TESTMODE - Simulating error while storing locally");
549                 return kFALSE;
550         }
551         
552         const char* cdbType = (localUri == fgkLocalCDB) ? "CDB" : "Reference";
553
554         Int_t firstRun = GetCurrentRun() - validityStart;
555         if(firstRun < 0) {
556                 AliWarning("First valid run happens to be less than 0! Setting it to 0.");
557                 firstRun=0;
558         }
559
560         Int_t lastRun = -1;
561         if(validityInfinite) {
562                 lastRun = AliCDBRunRange::Infinity();
563         } else {
564                 lastRun = GetCurrentRun();
565         }
566
567         // Version is set to current run, it will be used later to transfer data to Grid
568         AliCDBId id(path, firstRun, lastRun, GetCurrentRun(), -1);
569
570         if(! dynamic_cast<TObjString*> (metaData->GetProperty("RunUsed(TObjString)"))){
571                 TObjString runUsed = Form("%d", GetCurrentRun());
572                 metaData->SetProperty("RunUsed(TObjString)", runUsed.Clone());
573         }
574
575         Bool_t result = kFALSE;
576
577         if (!(AliCDBManager::Instance()->GetStorage(localUri))) {
578                 Log("SHUTTLE", Form("StoreLocally - Cannot activate local %s storage", cdbType));
579         } else {
580                 result = AliCDBManager::Instance()->GetStorage(localUri)
581                                         ->Put(object, id, metaData);
582         }
583
584         if(!result) {
585
586                 Log(fCurrentDetector, Form("StoreLocally - Can't store object <%s>!", id.ToString().Data()));
587         }
588
589         return result;
590 }
591
592 //______________________________________________________________________________________________
593 Bool_t AliShuttle::StoreOCDB()
594 {
595         //
596         // Called when preprocessor ends successfully or when previous storage attempt failed (kStoreError status)
597         // Calls underlying StoreOCDB(const char*) function twice, for OCDB and Reference storage.
598         // Then calls StoreRefFilesToGrid to store reference files. 
599         //
600         
601         if (fTestMode & kErrorGrid)
602         {
603                 Log("SHUTTLE", "StoreOCDB - In TESTMODE - Simulating error while storing in the Grid");
604                 Log(fCurrentDetector, "StoreOCDB - In TESTMODE - Simulating error while storing in the Grid");
605                 return kFALSE;
606         }
607         
608         Log("SHUTTLE","StoreOCDB - Storing OCDB data ...");
609         Bool_t resultCDB = StoreOCDB(fgkMainCDB);
610
611         Log("SHUTTLE","StoreOCDB - Storing reference data ...");
612         Bool_t resultRef = StoreOCDB(fgkMainRefStorage);
613         
614         Log("SHUTTLE","StoreOCDB - Storing reference files ...");
615         Bool_t resultRefFiles = CopyFilesToGrid("reference");
616         
617         Bool_t resultMetadata = kTRUE;
618         if(fCurrentDetector == "GRP") 
619         {
620                 Log("StoreOCDB - SHUTTLE","Storing Run Metadata file ...");
621                 resultMetadata = CopyFilesToGrid("metadata");
622         }
623         
624         return resultCDB && resultRef && resultRefFiles && resultMetadata;
625 }
626
627 //______________________________________________________________________________________________
628 Bool_t AliShuttle::StoreOCDB(const TString& gridURI)
629 {
630         //
631         // Called by StoreOCDB(), performs actual storage to the main OCDB and reference storages (Grid)
632         //
633
634         TObjArray* gridIds=0;
635
636         Bool_t result = kTRUE;
637
638         const char* type = 0;
639         TString localURI;
640         if(gridURI == fgkMainCDB) {
641                 type = "OCDB";
642                 localURI = fgkLocalCDB;
643         } else if(gridURI == fgkMainRefStorage) {
644                 type = "reference";
645                 localURI = fgkLocalRefStorage;
646         } else {
647                 AliError(Form("Invalid storage URI: %s", gridURI.Data()));
648                 return kFALSE;
649         }
650
651         AliCDBManager* man = AliCDBManager::Instance();
652
653         AliCDBStorage *gridSto = man->GetStorage(gridURI);
654         if(!gridSto) {
655                 Log("SHUTTLE",
656                         Form("StoreOCDB - cannot activate main %s storage", type));
657                 return kFALSE;
658         }
659
660         gridIds = gridSto->GetQueryCDBList();
661
662         // get objects previously stored in local CDB
663         AliCDBStorage *localSto = man->GetStorage(localURI);
664         if(!localSto) {
665                 Log("SHUTTLE",
666                         Form("StoreOCDB - cannot activate local %s storage", type));
667                 return kFALSE;
668         }
669         AliCDBPath aPath(GetOfflineDetName(fCurrentDetector.Data()),"*","*");
670         // Local objects were stored with current run as Grid version!
671         TList* localEntries = localSto->GetAll(aPath.GetPath(), GetCurrentRun(), GetCurrentRun());
672         localEntries->SetOwner(1);
673
674         // loop on local stored objects
675         TIter localIter(localEntries);
676         AliCDBEntry *aLocEntry = 0;
677         while((aLocEntry = dynamic_cast<AliCDBEntry*> (localIter.Next()))){
678                 aLocEntry->SetOwner(1);
679                 AliCDBId aLocId = aLocEntry->GetId();
680                 aLocEntry->SetVersion(-1);
681                 aLocEntry->SetSubVersion(-1);
682
683                 // If local object is valid up to infinity we store it only if it is
684                 // the first unprocessed run!
685                 if (aLocId.GetLastRun() == AliCDBRunRange::Infinity() &&
686                         !fFirstUnprocessed[GetDetPos(fCurrentDetector)])
687                 {
688                         Log("SHUTTLE", Form("StoreOCDB - %s: object %s has validity infinite but "
689                                                 "there are previous unprocessed runs!",
690                                                 fCurrentDetector.Data(), aLocId.GetPath().Data()));
691                         result = kFALSE;
692                         continue;
693                 }
694
695                 // loop on Grid valid Id's
696                 Bool_t store = kTRUE;
697                 TIter gridIter(gridIds);
698                 AliCDBId* aGridId = 0;
699                 while((aGridId = dynamic_cast<AliCDBId*> (gridIter.Next()))){
700                         if(aGridId->GetPath() != aLocId.GetPath()) continue;
701                         // skip all objects valid up to infinity
702                         if(aGridId->GetLastRun() == AliCDBRunRange::Infinity()) continue;
703                         // if we get here, it means there's already some more recent object stored on Grid!
704                         store = kFALSE;
705                         break;
706                 }
707
708                 // If we get here, the file can be stored!
709                 Bool_t storeOk = gridSto->Put(aLocEntry);
710                 if(!store || storeOk){
711
712                         if (!store)
713                         {
714                                 Log(fCurrentDetector.Data(),
715                                         Form("StoreOCDB - A more recent object already exists in %s storage: <%s>",
716                                                 type, aGridId->ToString().Data()));
717                         } else {
718                                 Log("SHUTTLE",
719                                         Form("StoreOCDB - Object <%s> successfully put into %s storage",
720                                                 aLocId.ToString().Data(), type));
721                                 Log(fCurrentDetector.Data(),
722                                         Form("StoreOCDB - Object <%s> successfully put into %s storage",
723                                                 aLocId.ToString().Data(), type));
724                         }
725
726                         // removing local filename...
727                         TString filename;
728                         localSto->IdToFilename(aLocId, filename);
729                         Log("SHUTTLE", Form("StoreOCDB - Removing local file %s", filename.Data()));
730                         RemoveFile(filename.Data());
731                         continue;
732                 } else  {
733                         Log("SHUTTLE",
734                                 Form("StoreOCDB - Grid %s storage of object <%s> failed",
735                                         type, aLocId.ToString().Data()));
736                         Log(fCurrentDetector.Data(),
737                                 Form("StoreOCDB - Grid %s storage of object <%s> failed",
738                                         type, aLocId.ToString().Data()));
739                         result = kFALSE;
740                 }
741         }
742         localEntries->Clear();
743
744         return result;
745 }
746
747 //______________________________________________________________________________________________
748 Bool_t AliShuttle::CleanReferenceStorage(const char* detector)
749 {
750         // clears the directory used to store reference files of a given subdetector
751   
752         AliCDBManager* man = AliCDBManager::Instance();
753         AliCDBStorage* sto = man->GetStorage(fgkLocalRefStorage);
754         TString localBaseFolder = sto->GetBaseFolder();
755
756         TString targetDir = GetRefFilePrefix(localBaseFolder.Data(), detector);
757         
758         Log("SHUTTLE", Form("CleanReferenceStorage - Cleaning %s", targetDir.Data()));
759
760         TString begin;
761         begin.Form("%d_", GetCurrentRun());
762         
763         TSystemDirectory* baseDir = new TSystemDirectory("/", targetDir);
764         if (!baseDir)
765                 return kTRUE;
766                 
767         TList* dirList = baseDir->GetListOfFiles();
768         delete baseDir;
769         
770         if (!dirList) return kTRUE;
771                         
772         if (dirList->GetEntries() < 3) 
773         {
774                 delete dirList;
775                 return kTRUE;
776         }
777                                 
778         Int_t nDirs = 0, nDel = 0;
779         TIter dirIter(dirList);
780         TSystemFile* entry = 0;
781
782         Bool_t success = kTRUE;
783         
784         while ((entry = dynamic_cast<TSystemFile*> (dirIter.Next())))
785         {                                       
786                 if (entry->IsDirectory())
787                         continue;
788                 
789                 TString fileName(entry->GetName());
790                 if (!fileName.BeginsWith(begin))
791                         continue;
792                         
793                 nDirs++;
794                                                 
795                 // delete file
796                 Int_t result = gSystem->Unlink(fileName.Data());
797                 
798                 if (result)
799                 {
800                         Log("SHUTTLE", Form("CleanReferenceStorage - Could not delete file %s!", fileName.Data()));
801                         success = kFALSE;
802                 } else {
803                         nDel++;
804                 }
805         }
806
807         if(nDirs > 0)
808                 Log("SHUTTLE", Form("CleanReferenceStorage - %d (over %d) reference files in folder %s were deleted.", 
809                         nDel, nDirs, targetDir.Data()));
810
811                 
812         delete dirList;
813         return success;
814
815
816
817
818
819
820   Int_t result = gSystem->GetPathInfo(targetDir, 0, (Long64_t*) 0, 0, 0);
821   if (result == 0)
822   {
823     // delete directory
824     result = gSystem->Exec(Form("rm -rf %s", targetDir.Data()));
825     if (result != 0)
826     {  
827       Log("SHUTTLE", Form("CleanReferenceStorage - Could not clean directory %s", targetDir.Data()));
828       return kFALSE;
829     }
830   }
831
832   result = gSystem->mkdir(targetDir, kTRUE);
833   if (result != 0)
834   {
835     Log("SHUTTLE", Form("CleanReferenceStorage - Error creating base directory %s", targetDir.Data()));
836     return kFALSE;
837   }
838         
839   return kTRUE;
840 }
841
842 //______________________________________________________________________________________________
843 Bool_t AliShuttle::StoreReferenceFile(const char* detector, const char* localFile, const char* gridFileName)
844 {
845         //
846         // Stores reference file directly (without opening it). This function stores the file locally.
847         //
848         // The file is stored under the following location: 
849         // <base folder of local reference storage>/<DET>/<RUN#>_<gridFileName>
850         // where <gridFileName> is the second parameter given to the function
851         // 
852         
853         if (fTestMode & kErrorStorage)
854         {
855                 Log(fCurrentDetector, "StoreReferenceFile - In TESTMODE - Simulating error while storing locally");
856                 return kFALSE;
857         }
858         
859         AliCDBManager* man = AliCDBManager::Instance();
860         AliCDBStorage* sto = man->GetStorage(fgkLocalRefStorage);
861         
862         TString localBaseFolder = sto->GetBaseFolder();
863         
864         TString target = GetRefFilePrefix(localBaseFolder.Data(), detector);    
865         target.Append(Form("/%d_%s", GetCurrentRun(), gridFileName));
866         
867         return CopyFileLocally(localFile, target);
868 }
869
870 //______________________________________________________________________________________________
871 Bool_t AliShuttle::StoreRunMetadataFile(const char* localFile, const char* gridFileName)
872 {
873         //
874         // Stores Run metadata file to the Grid, in the run folder
875         //
876         // Only GRP can call this function.
877         
878         if (fTestMode & kErrorStorage)
879         {
880                 Log(fCurrentDetector, "StoreRunMetaDataFile - In TESTMODE - Simulating error while storing locally");
881                 return kFALSE;
882         }
883         
884         AliCDBManager* man = AliCDBManager::Instance();
885         AliCDBStorage* sto = man->GetStorage(fgkLocalRefStorage);
886         
887         TString localBaseFolder = sto->GetBaseFolder();
888         
889         // Build Run level folder
890         // folder = /alice/data/year/lhcPeriod/runNb/raw
891         
892                 
893         TString lhcPeriod = GetLHCPeriod();     
894         if (lhcPeriod.Length() == 0) 
895         {
896                 Log("SHUTTLE","StoreRunMetaDataFile - LHCPeriod not found in logbook!");
897                 return 0;
898         }
899         
900         // TODO partitions with one detector only write data into LHCperiod_DET
901         TString partition = GetRunParameter("detector");
902         
903         if (partition.Length() > 0 && partition != "ALICE")
904         {
905                 lhcPeriod.Append(Form("_%s", partition.Data()));
906                 Log(fCurrentDetector, Form("Run data tags merged file will be written in %s", 
907                                 lhcPeriod.Data()));
908         }
909                 
910         TString target = Form("%s/GRP/RunMetadata/alice/data/%d/%s/%09d/raw/%s", 
911                                 localBaseFolder.Data(), GetCurrentYear(), 
912                                 lhcPeriod.Data(), GetCurrentRun(), gridFileName);
913                                         
914         return CopyFileLocally(localFile, target);
915 }
916
917 //______________________________________________________________________________________________
918 Bool_t AliShuttle::CopyFileLocally(const char* localFile, const TString& target)
919 {
920         //
921         // Stores file locally. Called by StoreReferenceFile and StoreRunMetadataFile
922         // Files are temporarily stored in the local reference storage. When the preprocessor 
923         // finishes, the Shuttle calls CopyFilesToGrid to transfer the files to AliEn 
924         // (in reference or run level folders)
925         //
926         
927         TString targetDir(target(0, target.Last('/')));
928         
929         //try to open base dir folder, if it does not exist
930         void* dir = gSystem->OpenDirectory(targetDir.Data());
931         if (dir == NULL) {
932                 if (gSystem->mkdir(targetDir.Data(), kTRUE)) {
933                         Log("SHUTTLE", Form("CopyFileLocally - Can't open directory <%s>", targetDir.Data()));
934                         return kFALSE;
935                 }
936
937         } else {
938                 gSystem->FreeDirectory(dir);
939         }
940         
941         Int_t result = 0;
942         
943         result = gSystem->GetPathInfo(localFile, 0, (Long64_t*) 0, 0, 0);
944         if (result)
945         {
946                 Log("SHUTTLE", Form("CopyFileLocally - %s does not exist", localFile));
947                 return kFALSE;
948         }
949
950         result = gSystem->GetPathInfo(target, 0, (Long64_t*) 0, 0, 0);
951         if (!result)
952         {
953                 Log("SHUTTLE", Form("CopyFileLocally - target file %s already exist, removing...", target.Data()));
954                 if (gSystem->Unlink(target.Data()))
955                 {
956                         Log("SHUTTLE", Form("CopyFileLocally - Could not remove existing target file %s!", target.Data()));
957                         return kFALSE;
958                 }
959         }       
960         
961         result = gSystem->CopyFile(localFile, target);
962
963         if (result == 0)
964         {
965                 Log("SHUTTLE", Form("CopyFileLocally - File %s stored locally to %s", localFile, target.Data()));
966                 return kTRUE;
967         }
968         else
969         {
970                 Log("SHUTTLE", Form("CopyFileLocally - Could not store file %s to %s! Error code = %d", 
971                                 localFile, target.Data(), result));
972                 return kFALSE;
973         }       
974
975
976
977 }
978
979 //______________________________________________________________________________________________
980 Bool_t AliShuttle::CopyFilesToGrid(const char* type)
981 {
982         //
983         // Transfers local files to the Grid. Local files can be reference files 
984         // or run metadata file (from GRP only).
985         //
986         // According to the type (ref, metadata) the files are stored under the following location: 
987         // ref --> <base folder of reference storage>/<DET>/<RUN#>_<gridFileName>
988         // metadata --> <run data folder>/<MetadataFileName>
989         //
990                 
991         AliCDBManager* man = AliCDBManager::Instance();
992         AliCDBStorage* sto = man->GetStorage(fgkLocalRefStorage);
993         if (!sto)
994                 return kFALSE;
995         TString localBaseFolder = sto->GetBaseFolder();
996         
997         TString dir;
998         TString alienDir;
999         TString begin;
1000         
1001         if (strcmp(type, "reference") == 0) 
1002         {
1003                 dir = GetRefFilePrefix(localBaseFolder.Data(), fCurrentDetector.Data());
1004                 AliCDBStorage* gridSto = man->GetStorage(fgkMainRefStorage);
1005                 if (!gridSto)
1006                         return kFALSE;
1007                 TString gridBaseFolder = gridSto->GetBaseFolder();
1008                 alienDir = GetRefFilePrefix(gridBaseFolder.Data(), fCurrentDetector.Data());
1009                 begin = Form("%d_", GetCurrentRun());
1010         } 
1011         else if (strcmp(type, "metadata") == 0)
1012         {
1013                         
1014                 TString lhcPeriod = GetLHCPeriod();
1015         
1016                 if (lhcPeriod.Length() == 0) 
1017                 {
1018                         Log("SHUTTLE","CopyFilesToGrid - LHCPeriod not found in logbook!");
1019                         return 0;
1020                 }
1021                 
1022                 // TODO partitions with one detector only write data into LHCperiod_DET
1023                 TString partition = GetRunParameter("detector");
1024         
1025                 if (partition.Length() > 0 && partition != "ALICE")
1026                 {
1027                         lhcPeriod.Append(Form("_%s", partition.Data()));
1028                 }
1029                 
1030                 dir = Form("%s/GRP/RunMetadata/alice/data/%d/%s/%09d/raw", 
1031                                 localBaseFolder.Data(), GetCurrentYear(), 
1032                                 lhcPeriod.Data(), GetCurrentRun());
1033                 alienDir = dir(dir.Index("/alice/data/"), dir.Length());
1034                 
1035                 begin = "";
1036         }
1037         else 
1038         {
1039                 Log("SHUTTLE", "CopyFilesToGrid - Unexpected: type label must be reference or metadata!");
1040                 return kFALSE;
1041         }
1042                 
1043         TSystemDirectory* baseDir = new TSystemDirectory("/", dir);
1044         if (!baseDir)
1045                 return kTRUE;
1046                 
1047         TList* dirList = baseDir->GetListOfFiles();
1048         delete baseDir;
1049         
1050         if (!dirList) return kTRUE;
1051                 
1052         if (dirList->GetEntries() < 3) 
1053         {
1054                 delete dirList;
1055                 return kTRUE;
1056         }
1057                         
1058         if (!gGrid)
1059         { 
1060                 Log("SHUTTLE", "CopyFilesToGrid - Connection to Grid failed: Cannot continue!");
1061                 delete dirList;
1062                 return kFALSE;
1063         }
1064         
1065         Int_t nDirs = 0, nTransfer = 0;
1066         TIter dirIter(dirList);
1067         TSystemFile* entry = 0;
1068
1069         Bool_t success = kTRUE;
1070         Bool_t first = kTRUE;
1071         
1072         while ((entry = dynamic_cast<TSystemFile*> (dirIter.Next())))
1073         {                       
1074                 if (entry->IsDirectory())
1075                         continue;
1076                         
1077                 TString fileName(entry->GetName());
1078                 if (!fileName.BeginsWith(begin))
1079                         continue;
1080                         
1081                 nDirs++;
1082                         
1083                 if (first)
1084                 {
1085                         first = kFALSE;
1086                         // check that folder exists, otherwise create it
1087                         TGridResult* result = gGrid->Ls(alienDir.Data(), "a");
1088                         
1089                         if (!result)
1090                         {
1091                                 delete dirList;
1092                                 return kFALSE;
1093                         }
1094                         
1095                         if (!result->GetFileName(1)) // TODO: It looks like element 0 is always 0!!
1096                         {
1097                                 // TODO It does not work currently! Bug in TAliEn::Mkdir
1098                                 // TODO Manually fixed in local root v5-16-00
1099                                 if (!gGrid->Mkdir(alienDir.Data(),"-p",0))
1100                                 {
1101                                         Log("SHUTTLE", Form("CopyFilesToGrid - Cannot create directory %s",
1102                                                         alienDir.Data()));
1103                                         delete dirList;
1104                                         return kFALSE;
1105                                 } else {
1106                                         Log("SHUTTLE",Form("CopyFilesToGrid - Folder %s created", alienDir.Data()));
1107                                 }
1108                                 
1109                         } else {
1110                                         Log("SHUTTLE",Form("CopyFilesToGrid - Folder %s found", alienDir.Data()));
1111                         }
1112                 }
1113                         
1114                 TString fullLocalPath;
1115                 fullLocalPath.Form("%s/%s", dir.Data(), fileName.Data());
1116                 
1117                 TString fullGridPath;
1118                 fullGridPath.Form("alien://%s/%s", alienDir.Data(), fileName.Data());
1119
1120                 Bool_t result = TFile::Cp(fullLocalPath, fullGridPath);
1121                 
1122                 if (result)
1123                 {
1124                         Log("SHUTTLE", Form("CopyFilesToGrid - Copying local file %s to %s succeeded!", 
1125                                                 fullLocalPath.Data(), fullGridPath.Data()));
1126                         RemoveFile(fullLocalPath);
1127                         nTransfer++;
1128                 }
1129                 else
1130                 {
1131                         Log("SHUTTLE", Form("CopyFilesToGrid - Copying local file %s to %s FAILED!", 
1132                                                 fullLocalPath.Data(), fullGridPath.Data()));
1133                         success = kFALSE;
1134                 }
1135         }
1136
1137         Log("SHUTTLE", Form("CopyFilesToGrid - %d (over %d) files in folder %s copied to Grid.", 
1138                                                 nTransfer, nDirs, dir.Data()));
1139
1140                 
1141         delete dirList;
1142         return success;
1143 }
1144
1145 //______________________________________________________________________________________________
1146 const char* AliShuttle::GetRefFilePrefix(const char* base, const char* detector)
1147 {
1148         //
1149         // Get folder name of reference files 
1150         //
1151
1152         TString offDetStr(GetOfflineDetName(detector));
1153         TString dir;
1154         if (offDetStr == "ITS" || offDetStr == "MUON" || offDetStr == "PHOS")
1155         {
1156                 dir.Form("%s/%s/%s", base, offDetStr.Data(), detector);
1157         } else {
1158                 dir.Form("%s/%s", base, offDetStr.Data());
1159         }
1160         
1161         return dir.Data();
1162         
1163
1164 }
1165
1166 //______________________________________________________________________________________________
1167 void AliShuttle::CleanLocalStorage(const TString& uri)
1168 {
1169         //
1170         // Called in case the preprocessor is declared failed. Remove remaining objects from the local storages.
1171         //
1172
1173         const char* type = 0;
1174         if(uri == fgkLocalCDB) {
1175                 type = "OCDB";
1176         } else if(uri == fgkLocalRefStorage) {
1177                 type = "Reference";
1178         } else {
1179                 AliError(Form("Invalid storage URI: %s", uri.Data()));
1180                 return;
1181         }
1182
1183         AliCDBManager* man = AliCDBManager::Instance();
1184
1185         // open local storage
1186         AliCDBStorage *localSto = man->GetStorage(uri);
1187         if(!localSto) {
1188                 Log("SHUTTLE",
1189                         Form("CleanLocalStorage - cannot activate local %s storage", type));
1190                 return;
1191         }
1192
1193         TString filename(Form("%s/%s/*/Run*_v%d_s*.root",
1194                 localSto->GetBaseFolder().Data(), GetOfflineDetName(fCurrentDetector.Data()), GetCurrentRun()));
1195
1196         AliDebug(2, Form("filename = %s", filename.Data()));
1197
1198         Log("SHUTTLE", Form("Removing remaining local files for run %d and detector %s ...",
1199                 GetCurrentRun(), fCurrentDetector.Data()));
1200
1201         RemoveFile(filename.Data());
1202
1203 }
1204
1205 //______________________________________________________________________________________________
1206 void AliShuttle::RemoveFile(const char* filename)
1207 {
1208         //
1209         // removes local file
1210         //
1211
1212         TString command(Form("rm -f %s", filename));
1213
1214         Int_t result = gSystem->Exec(command.Data());
1215         if(result != 0)
1216         {
1217                 Log("SHUTTLE", Form("RemoveFile - %s: Cannot remove file %s!",
1218                         fCurrentDetector.Data(), filename));
1219         }
1220 }
1221
1222 //______________________________________________________________________________________________
1223 AliShuttleStatus* AliShuttle::ReadShuttleStatus()
1224 {
1225         //
1226         // Reads the AliShuttleStatus from the CDB
1227         //
1228
1229         if (fStatusEntry){
1230                 delete fStatusEntry;
1231                 fStatusEntry = 0;
1232         }
1233
1234         fStatusEntry = AliCDBManager::Instance()->GetStorage(GetLocalCDB())
1235                 ->Get(Form("/SHUTTLE/STATUS/%s", fCurrentDetector.Data()), GetCurrentRun());
1236
1237         if (!fStatusEntry) return 0;
1238         fStatusEntry->SetOwner(1);
1239
1240         AliShuttleStatus* status = dynamic_cast<AliShuttleStatus*> (fStatusEntry->GetObject());
1241         if (!status) {
1242                 AliError("Invalid object stored to CDB!");
1243                 return 0;
1244         }
1245
1246         return status;
1247 }
1248
1249 //______________________________________________________________________________________________
1250 Bool_t AliShuttle::WriteShuttleStatus(AliShuttleStatus* status)
1251 {
1252         //
1253         // writes the status for one subdetector
1254         //
1255
1256         if (fStatusEntry){
1257                 delete fStatusEntry;
1258                 fStatusEntry = 0;
1259         }
1260
1261         Int_t run = GetCurrentRun();
1262
1263         AliCDBId id(AliCDBPath("SHUTTLE", "STATUS", fCurrentDetector), run, run);
1264
1265         fStatusEntry = new AliCDBEntry(status, id, new AliCDBMetaData);
1266         fStatusEntry->SetOwner(1);
1267
1268         UInt_t result = AliCDBManager::Instance()->GetStorage(fgkLocalCDB)->Put(fStatusEntry);
1269
1270         if (!result) {
1271                 Log("SHUTTLE", Form("WriteShuttleStatus - Failed for %s, run %d",
1272                                                 fCurrentDetector.Data(), run));
1273                 return kFALSE;
1274         }
1275         
1276         SendMLInfo();
1277
1278         return kTRUE;
1279 }
1280
1281 //______________________________________________________________________________________________
1282 void AliShuttle::UpdateShuttleStatus(AliShuttleStatus::Status newStatus, Bool_t increaseCount)
1283 {
1284         //
1285         // changes the AliShuttleStatus for the given detector and run to the given status
1286         //
1287
1288         if (!fStatusEntry){
1289                 AliError("UNEXPECTED: fStatusEntry empty");
1290                 return;
1291         }
1292
1293         AliShuttleStatus* status = dynamic_cast<AliShuttleStatus*> (fStatusEntry->GetObject());
1294
1295         if (!status){
1296                 Log("SHUTTLE", "UpdateShuttleStatus - UNEXPECTED: status could not be read from current CDB entry");
1297                 return;
1298         }
1299
1300         TString actionStr = Form("UpdateShuttleStatus - %s: Changing state from %s to %s",
1301                                 fCurrentDetector.Data(),
1302                                 status->GetStatusName(),
1303                                 status->GetStatusName(newStatus));
1304         Log("SHUTTLE", actionStr);
1305         SetLastAction(actionStr);
1306
1307         status->SetStatus(newStatus);
1308         if (increaseCount) status->IncreaseCount();
1309
1310         AliCDBManager::Instance()->GetStorage(fgkLocalCDB)->Put(fStatusEntry);
1311
1312         SendMLInfo();
1313 }
1314
1315 //______________________________________________________________________________________________
1316 void AliShuttle::SendMLInfo()
1317 {
1318         //
1319         // sends ML information about the current status of the current detector being processed
1320         //
1321         
1322         AliShuttleStatus* status = dynamic_cast<AliShuttleStatus*> (fStatusEntry->GetObject());
1323         
1324         if (!status){
1325                 Log("SHUTTLE", "SendMLInfo - UNEXPECTED: status could not be read from current CDB entry");
1326                 return;
1327         }
1328         
1329         TMonaLisaText  mlStatus(Form("%s_status", fCurrentDetector.Data()), status->GetStatusName());
1330         TMonaLisaValue mlRetryCount(Form("%s_count", fCurrentDetector.Data()), status->GetCount());
1331
1332         TList mlList;
1333         mlList.Add(&mlStatus);
1334         mlList.Add(&mlRetryCount);
1335
1336         TString mlID;
1337         mlID.Form("%d", GetCurrentRun());
1338         fMonaLisa->SendParameters(&mlList, mlID);
1339 }
1340
1341 //______________________________________________________________________________________________
1342 Bool_t AliShuttle::ContinueProcessing()
1343 {
1344         // this function reads the AliShuttleStatus information from CDB and
1345         // checks if the processing should be continued
1346         // if yes it returns kTRUE and updates the AliShuttleStatus with nextStatus
1347
1348         if (!fConfig->HostProcessDetector(fCurrentDetector)) return kFALSE;
1349
1350         AliPreprocessor* aPreprocessor =
1351                 dynamic_cast<AliPreprocessor*> (fPreprocessorMap.GetValue(fCurrentDetector));
1352         if (!aPreprocessor)
1353         {
1354                 Log("SHUTTLE", Form("ContinueProcessing - %s: no preprocessor registered", fCurrentDetector.Data()));
1355                 return kFALSE;
1356         }
1357
1358         AliShuttleLogbookEntry::Status entryStatus =
1359                 fLogbookEntry->GetDetectorStatus(fCurrentDetector);
1360
1361         if(entryStatus != AliShuttleLogbookEntry::kUnprocessed) {
1362                 Log("SHUTTLE", Form("ContinueProcessing - %s is %s",
1363                                 fCurrentDetector.Data(),
1364                                 fLogbookEntry->GetDetectorStatusName(entryStatus)));
1365                 return kFALSE;
1366         }
1367
1368         // if we get here, according to Shuttle logbook subdetector is in UNPROCESSED state
1369
1370         // check if current run is first unprocessed run for current detector
1371         if (fConfig->StrictRunOrder(fCurrentDetector) &&
1372                 !fFirstUnprocessed[GetDetPos(fCurrentDetector)])
1373         {
1374                 if (fTestMode == kNone)
1375                 {
1376                         Log("SHUTTLE", Form("ContinueProcessing - %s requires strict run ordering"
1377                                         " but this is not the first unprocessed run!"));
1378                         return kFALSE;
1379                 }
1380                 else
1381                 {
1382                         Log("SHUTTLE", Form("ContinueProcessing - In TESTMODE - "
1383                                         "Although %s requires strict run ordering "
1384                                         "and this is not the first unprocessed run, "
1385                                         "the SHUTTLE continues"));
1386                 }
1387         }
1388
1389         AliShuttleStatus* status = ReadShuttleStatus();
1390         if (!status) {
1391                 // first time
1392                 Log("SHUTTLE", Form("ContinueProcessing - %s: Processing first time",
1393                                 fCurrentDetector.Data()));
1394                 status = new AliShuttleStatus(AliShuttleStatus::kStarted);
1395                 return WriteShuttleStatus(status);
1396         }
1397
1398         // The following two cases shouldn't happen if Shuttle Logbook was correctly updated.
1399         // If it happens it may mean Logbook updating failed... let's do it now!
1400         if (status->GetStatus() == AliShuttleStatus::kDone ||
1401             status->GetStatus() == AliShuttleStatus::kFailed){
1402                 Log("SHUTTLE", Form("ContinueProcessing - %s is already %s. Updating Shuttle Logbook",
1403                                         fCurrentDetector.Data(),
1404                                         status->GetStatusName(status->GetStatus())));
1405                 UpdateShuttleLogbook(fCurrentDetector.Data(),
1406                                         status->GetStatusName(status->GetStatus()));
1407                 return kFALSE;
1408         }
1409
1410         if (status->GetStatus() == AliShuttleStatus::kStoreStarted || status->GetStatus() == AliShuttleStatus::kStoreError) {
1411                 Log("SHUTTLE",
1412                         Form("ContinueProcessing - %s: Grid storage of one or more "
1413                                 "objects failed. Trying again now",
1414                                 fCurrentDetector.Data()));
1415                 UpdateShuttleStatus(AliShuttleStatus::kStoreStarted);
1416                 if (StoreOCDB()){
1417                         Log("SHUTTLE", Form("ContinueProcessing - %s: all objects "
1418                                 "successfully stored into main storage",
1419                                 fCurrentDetector.Data()));
1420                         UpdateShuttleStatus(AliShuttleStatus::kDone);
1421                         UpdateShuttleLogbook(fCurrentDetector, "DONE");
1422                 } else {
1423                         Log("SHUTTLE",
1424                                 Form("ContinueProcessing - %s: Grid storage failed again",
1425                                         fCurrentDetector.Data()));
1426                         UpdateShuttleStatus(AliShuttleStatus::kStoreError);
1427                 }
1428                 return kFALSE;
1429         }
1430
1431         // if we get here, there is a restart
1432         Bool_t cont = kFALSE;
1433
1434         // abort conditions
1435         if (status->GetCount() >= fConfig->GetMaxRetries()) {
1436                 Log("SHUTTLE", Form("ContinueProcessing - %s failed %d times in status %s - "
1437                                 "Updating Shuttle Logbook", fCurrentDetector.Data(),
1438                                 status->GetCount(), status->GetStatusName()));
1439                 UpdateShuttleLogbook(fCurrentDetector.Data(), "FAILED");
1440                 UpdateShuttleStatus(AliShuttleStatus::kFailed);
1441
1442                 // there may still be objects in local OCDB and reference storage
1443                 // and FXS databases may be not updated: do it now!
1444                 
1445                 // TODO Currently disabled, we want to keep files in case of failure!
1446                 // CleanLocalStorage(fgkLocalCDB);
1447                 // CleanLocalStorage(fgkLocalRefStorage);
1448                 // UpdateTableFailCase();
1449                 
1450                 // Send mail to detector expert!
1451                 Log("SHUTTLE", Form("ContinueProcessing - Sending mail to %s expert...", 
1452                                         fCurrentDetector.Data()));
1453                 if (!SendMail())
1454                         Log("SHUTTLE", Form("ContinueProcessing - Could not send mail to %s expert",
1455                                         fCurrentDetector.Data()));
1456
1457         } else {
1458                 Log("SHUTTLE", Form("ContinueProcessing - %s: restarting. "
1459                                 "Aborted before with %s. Retry number %d.", fCurrentDetector.Data(),
1460                                 status->GetStatusName(), status->GetCount()));
1461                 Bool_t increaseCount = kTRUE;
1462                 if (status->GetStatus() == AliShuttleStatus::kDCSError || 
1463                         status->GetStatus() == AliShuttleStatus::kDCSStarted)
1464                                 increaseCount = kFALSE;
1465                                 
1466                 UpdateShuttleStatus(AliShuttleStatus::kStarted, increaseCount);
1467                 cont = kTRUE;
1468         }
1469
1470         return cont;
1471 }
1472
1473 //______________________________________________________________________________________________
1474 Bool_t AliShuttle::Process(AliShuttleLogbookEntry* entry)
1475 {
1476         //
1477         // Makes data retrieval for all detectors in the configuration.
1478         // entry: Shuttle logbook entry, contains run paramenters and status of detectors
1479         // (Unprocessed, Inactive, Failed or Done).
1480         // Returns kFALSE in case of error occured and kTRUE otherwise
1481         //
1482
1483         if (!entry) return kFALSE;
1484
1485         fLogbookEntry = entry;
1486
1487         Log("SHUTTLE", Form("\t\t\t^*^*^*^*^*^*^*^*^*^*^*^* run %d: START ^*^*^*^*^*^*^*^*^*^*^*^*",
1488                                         GetCurrentRun()));
1489
1490         // Send the information to ML
1491         TMonaLisaText  mlStatus("SHUTTLE_status", "Processing");
1492         TMonaLisaText  mlRunType("SHUTTLE_runtype", Form("%s (%s)", entry->GetRunType(), entry->GetRunParameter("log")));
1493
1494         TList mlList;
1495         mlList.Add(&mlStatus);
1496         mlList.Add(&mlRunType);
1497
1498         TString mlID;
1499         mlID.Form("%d", GetCurrentRun());
1500         fMonaLisa->SendParameters(&mlList, mlID);
1501
1502         if (fLogbookEntry->IsDone())
1503         {
1504                 Log("SHUTTLE","Process - Shuttle is already DONE. Updating logbook");
1505                 UpdateShuttleLogbook("shuttle_done");
1506                 fLogbookEntry = 0;
1507                 return kTRUE;
1508         }
1509
1510         // read test mode if flag is set
1511         if (fReadTestMode)
1512         {
1513                 fTestMode = kNone;
1514                 TString logEntry(entry->GetRunParameter("log"));
1515                 //printf("log entry = %s\n", logEntry.Data());
1516                 TString searchStr("Testmode: ");
1517                 Int_t pos = logEntry.Index(searchStr.Data());
1518                 //printf("%d\n", pos);
1519                 if (pos >= 0)
1520                 {
1521                         TSubString subStr = logEntry(pos + searchStr.Length(), logEntry.Length());
1522                         //printf("%s\n", subStr.String().Data());
1523                         TString newStr(subStr.Data());
1524                         TObjArray* token = newStr.Tokenize(' ');
1525                         if (token)
1526                         {
1527                                 //token->Print();
1528                                 TObjString* tmpStr = dynamic_cast<TObjString*> (token->First());
1529                                 if (tmpStr)
1530                                 {
1531                                         Int_t testMode = tmpStr->String().Atoi();
1532                                         if (testMode > 0)
1533                                         {
1534                                                 Log("SHUTTLE", Form("Process - Enabling test mode %d", testMode));
1535                                                 SetTestMode((TestMode) testMode);
1536                                         }
1537                                 }
1538                                 delete token;          
1539                         }
1540                 }
1541         }
1542                 
1543         fLogbookEntry->Print("all");
1544
1545         // Initialization
1546         Bool_t hasError = kFALSE;
1547
1548         // Set the CDB and Reference folders according to the year and LHC period
1549         TString lhcPeriod(GetLHCPeriod());
1550         if (lhcPeriod.Length() == 0) 
1551         {
1552                 Log("SHUTTLE","Process - LHCPeriod not found in logbook!");
1553                 return 0; 
1554         }       
1555         
1556         if (fgkMainCDB.Length() == 0)
1557                 fgkMainCDB = Form("alien://folder=/alice/data/%d/%s/OCDB?user=alidaq?cacheFold=/tmp/OCDBCache", 
1558                                         GetCurrentYear(), lhcPeriod.Data());
1559         
1560         if (fgkMainRefStorage.Length() == 0)
1561                 fgkMainRefStorage = Form("alien://folder=/alice/data/%d/%s/Reference?user=alidaq?cacheFold=/tmp/OCDBCache", 
1562                                         GetCurrentYear(), lhcPeriod.Data());
1563         
1564         // Loop on detectors in the configuration
1565         TIter iter(fConfig->GetDetectors());
1566         TObjString* aDetector = 0;
1567
1568         Bool_t first = kTRUE;
1569
1570         while ((aDetector = (TObjString*) iter.Next()))
1571         {
1572                 fCurrentDetector = aDetector->String();
1573
1574                 if (ContinueProcessing() == kFALSE) continue;
1575                 
1576                 if (first)
1577                 {
1578                   // only read QueryCDB when needed and only once
1579                   AliCDBStorage *mainCDBSto = AliCDBManager::Instance()->GetStorage(fgkMainCDB);
1580                   if(mainCDBSto) mainCDBSto->QueryCDB(GetCurrentRun());
1581                   AliCDBStorage *mainRefSto = AliCDBManager::Instance()->GetStorage(fgkMainRefStorage);
1582                   if(mainRefSto) mainRefSto->QueryCDB(GetCurrentRun());
1583                   first = kFALSE;
1584                 }
1585
1586                 Log("SHUTTLE", Form("\t\t\t****** run %d - %s: START  ******",
1587                                                 GetCurrentRun(), aDetector->GetName()));
1588
1589                 for(Int_t iSys=0;iSys<3;iSys++) fFXSCalled[iSys]=kFALSE;
1590
1591                 Log(fCurrentDetector.Data(), "Process - Starting processing");
1592
1593                 Int_t pid = fork();
1594
1595                 if (pid < 0)
1596                 {
1597                         Log("SHUTTLE", "Process - ERROR: Forking failed");
1598                 }
1599                 else if (pid > 0)
1600                 {
1601                         // parent
1602                         Log("SHUTTLE", Form("Process - In parent process of %d - %s: Starting monitoring",
1603                                                         GetCurrentRun(), aDetector->GetName()));
1604
1605                         Long_t begin = time(0);
1606
1607                         int status; // to be used with waitpid, on purpose an int (not Int_t)!
1608                         while (waitpid(pid, &status, WNOHANG) == 0)
1609                         {
1610                                 Long_t expiredTime = time(0) - begin;
1611
1612                                 if (expiredTime > fConfig->GetPPTimeOut())
1613                                 {
1614                                         TString tmp;
1615                                         tmp.Form("Process - Process of %s time out. "
1616                                                         "Run time: %d seconds. Killing...",
1617                                                         fCurrentDetector.Data(), expiredTime);
1618                                         Log("SHUTTLE", tmp);
1619                                         Log(fCurrentDetector, tmp);
1620
1621                                         kill(pid, 9);
1622
1623                                         UpdateShuttleStatus(AliShuttleStatus::kPPTimeOut);
1624                                         hasError = kTRUE;
1625
1626                                         gSystem->Sleep(1000);
1627                                 }
1628                                 else
1629                                 {
1630                                         gSystem->Sleep(1000);
1631                                         
1632                                         TString checkStr;
1633                                         checkStr.Form("ps -o vsize --pid %d | tail -n 1", pid);
1634                                         FILE* pipe = gSystem->OpenPipe(checkStr, "r");
1635                                         if (!pipe)
1636                                         {
1637                                                 Log("SHUTTLE", Form("Process - Error: "
1638                                                         "Could not open pipe to %s", checkStr.Data()));
1639                                                 continue;
1640                                         }
1641                                                 
1642                                         char buffer[100];
1643                                         if (!fgets(buffer, 100, pipe))
1644                                         {
1645                                                 Log("SHUTTLE", "Process - Error: ps did not return anything");
1646                                                 gSystem->ClosePipe(pipe);
1647                                                 continue;
1648                                         }
1649                                         gSystem->ClosePipe(pipe);
1650                                         
1651                                         //Log("SHUTTLE", Form("ps returned %s", buffer));
1652                                         
1653                                         Int_t mem = 0;
1654                                         if ((sscanf(buffer, "%d\n", &mem) != 1) || !mem)
1655                                         {
1656                                                 Log("SHUTTLE", "Process - Error: Could not parse output of ps");
1657                                                 continue;
1658                                         }
1659                                         
1660                                         if (expiredTime % 60 == 0)
1661                                         {
1662                                                 Log("SHUTTLE", Form("Process - %s: Checking process. "
1663                                                         "Run time: %d seconds - Memory consumption: %d KB",
1664                                                         fCurrentDetector.Data(), expiredTime, mem));
1665                                                 SendAlive();
1666                                         }
1667                                         
1668                                         if (mem > fConfig->GetPPMaxMem())
1669                                         {
1670                                                 TString tmp;
1671                                                 tmp.Form("Process - Process exceeds maximum allowed memory "
1672                                                         "(%d KB > %d KB). Killing...",
1673                                                         mem, fConfig->GetPPMaxMem());
1674                                                 Log("SHUTTLE", tmp);
1675                                                 Log(fCurrentDetector, tmp);
1676         
1677                                                 kill(pid, 9);
1678         
1679                                                 UpdateShuttleStatus(AliShuttleStatus::kPPOutOfMemory);
1680                                                 hasError = kTRUE;
1681         
1682                                                 gSystem->Sleep(1000);
1683                                         }
1684                                 }
1685                         }
1686
1687                         Log("SHUTTLE", Form("Process - In parent process of %d - %s: Client has terminated.",
1688                                                                 GetCurrentRun(), aDetector->GetName()));
1689
1690                         if (WIFEXITED(status))
1691                         {
1692                                 Int_t returnCode = WEXITSTATUS(status);
1693
1694                                 Log("SHUTTLE", Form("Process - %s: the return code is %d", fCurrentDetector.Data(),
1695                                                                                 returnCode));
1696
1697                                 if (returnCode == 0) hasError = kTRUE;
1698                         }
1699                 }
1700                 else if (pid == 0)
1701                 {
1702                         // client
1703                         Log("SHUTTLE", Form("Process - In client process of %d - %s", GetCurrentRun(),
1704                                 aDetector->GetName()));
1705
1706                         Log("SHUTTLE", Form("Process - Redirecting output to %s log",fCurrentDetector.Data()));
1707
1708                         if ((freopen(GetLogFileName(fCurrentDetector), "a", stdout)) == 0)
1709                         {
1710                                 Log("SHUTTLE", "Process - Could not freopen stdout");
1711                         }
1712                         else
1713                         {
1714                                 fOutputRedirected = kTRUE;
1715                                 if ((dup2(fileno(stdout), fileno(stderr))) < 0)
1716                                         Log("SHUTTLE", "Process - Could not redirect stderr");
1717                                 
1718                         }
1719                         
1720                         TString wd = gSystem->WorkingDirectory();
1721                         TString tmpDir = Form("%s/%s_%d_process", GetShuttleTempDir(), 
1722                                 fCurrentDetector.Data(), GetCurrentRun());
1723                         
1724                         Int_t result = gSystem->GetPathInfo(tmpDir.Data(), 0, (Long64_t*) 0, 0, 0);
1725                         if (!result) // temp dir already exists!
1726                         {
1727                                 Log(fCurrentDetector.Data(), 
1728                                         Form("Process - %s dir already exists! Removing...", tmpDir.Data()));
1729                                 gSystem->Exec(Form("rm -rf %s",tmpDir.Data()));         
1730                         } 
1731                         
1732                         if (gSystem->mkdir(tmpDir.Data(), 1))
1733                         {
1734                                 Log(fCurrentDetector.Data(), "Process - could not make temp directory!!");
1735                                 gSystem->Exit(1);
1736                         }
1737                         
1738                         if (!gSystem->ChangeDirectory(tmpDir.Data())) 
1739                         {
1740                                 Log(fCurrentDetector.Data(), "Process - could not change directory!!");
1741                                 gSystem->Exit(1);                       
1742                         }
1743                         
1744                         Bool_t success = ProcessCurrentDetector();
1745                         
1746                         gSystem->ChangeDirectory(wd.Data());
1747                                                 
1748                         if (success) // Preprocessor finished successfully!
1749                         { 
1750                                 // remove temporary folder or DCS map
1751                                 if (!fConfig->KeepTempFolder())
1752                                 {
1753                                         gSystem->Exec(Form("rm -rf %s",tmpDir.Data()));
1754                                 } else if (!fConfig->KeepDCSMap())
1755                                 {
1756                                         gSystem->Exec(Form("rm -f %s/DCSMap.root",tmpDir.Data()));
1757                                 }
1758                                 
1759                                 // Update time_processed field in FXS DB
1760                                 if (UpdateTable() == kFALSE)
1761                                         Log("SHUTTLE", Form("Process - %s: Could not update FXS databases!", 
1762                                                         fCurrentDetector.Data()));
1763
1764                                 // Transfer the data from local storage to main storage (Grid)
1765                                 UpdateShuttleStatus(AliShuttleStatus::kStoreStarted);
1766                                 if (StoreOCDB() == kFALSE)
1767                                 {
1768                                         Log("SHUTTLE", 
1769                                                 Form("\t\t\t****** run %d - %s: STORAGE ERROR ******",
1770                                                         GetCurrentRun(), aDetector->GetName()));
1771                                         UpdateShuttleStatus(AliShuttleStatus::kStoreError);
1772                                         success = kFALSE;
1773                                 } else {
1774                                         Log("SHUTTLE", 
1775                                                 Form("\t\t\t****** run %d - %s: DONE ******",
1776                                                         GetCurrentRun(), aDetector->GetName()));
1777                                         UpdateShuttleStatus(AliShuttleStatus::kDone);
1778                                         UpdateShuttleLogbook(fCurrentDetector, "DONE");
1779                                 }
1780                         } else 
1781                         {
1782                                 Log("SHUTTLE", 
1783                                         Form("\t\t\t****** run %d - %s: PP ERROR ******",
1784                                                 GetCurrentRun(), aDetector->GetName()));
1785                         }
1786
1787                         for (UInt_t iSys=0; iSys<3; iSys++)
1788                         {
1789                                 if (fFXSCalled[iSys]) fFXSlist[iSys].Clear();
1790                         }
1791
1792                         Log("SHUTTLE", Form("Process - Client process of %d - %s is exiting now with %d.",
1793                                                         GetCurrentRun(), aDetector->GetName(), success));
1794
1795                         // the client exits here
1796                         gSystem->Exit(success);
1797
1798                         AliError("We should never get here!!!");
1799                 }
1800         }
1801
1802         Log("SHUTTLE", Form("\t\t\t^*^*^*^*^*^*^*^*^*^*^*^* run %d: FINISH ^*^*^*^*^*^*^*^*^*^*^*^*",
1803                                                         GetCurrentRun()));
1804
1805         //check if shuttle is done for this run, if so update logbook
1806         TObjArray checkEntryArray;
1807         checkEntryArray.SetOwner(1);
1808         TString whereClause = Form("where run=%d", GetCurrentRun());
1809         if (!QueryShuttleLogbook(whereClause.Data(), checkEntryArray) || 
1810                         checkEntryArray.GetEntries() == 0) {
1811                 Log("SHUTTLE", Form("Process - Warning: Cannot check status of run %d on Shuttle logbook!",
1812                                                 GetCurrentRun()));
1813                 return hasError == kFALSE;
1814         }
1815
1816         AliShuttleLogbookEntry* checkEntry = dynamic_cast<AliShuttleLogbookEntry*>
1817                                                 (checkEntryArray.At(0));
1818
1819         if (checkEntry)
1820         {
1821                 if (checkEntry->IsDone())
1822                 {
1823                         Log("SHUTTLE","Process - Shuttle is DONE. Updating logbook");
1824                         UpdateShuttleLogbook("shuttle_done");
1825                 }
1826                 else
1827                 {
1828                         for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
1829                         {
1830                                 if (checkEntry->GetDetectorStatus(iDet) == AliShuttleLogbookEntry::kUnprocessed)
1831                                 {
1832                                         AliDebug(2, Form("Run %d: setting %s as \"not first time unprocessed\"",
1833                                                         checkEntry->GetRun(), GetDetName(iDet)));
1834                                         fFirstUnprocessed[iDet] = kFALSE;
1835                                 }
1836                         }
1837                 }
1838         }
1839
1840         fLogbookEntry = 0;
1841
1842         return hasError == kFALSE;
1843 }
1844
1845 //______________________________________________________________________________________________
1846 Bool_t AliShuttle::ProcessCurrentDetector()
1847 {
1848         //
1849         // Makes data retrieval just for a specific detector (fCurrentDetector).
1850         // Threre should be a configuration for this detector.
1851
1852         Log("SHUTTLE", Form("ProcessCurrentDetector - Retrieving values for %s, run %d", 
1853                                                 fCurrentDetector.Data(), GetCurrentRun()));
1854
1855         TString wd = gSystem->WorkingDirectory();
1856         
1857         if (!CleanReferenceStorage(fCurrentDetector.Data()))
1858                 return kFALSE;
1859         
1860         gSystem->ChangeDirectory(wd.Data());
1861         
1862         TMap* dcsMap = new TMap();
1863
1864         // call preprocessor
1865         AliPreprocessor* aPreprocessor =
1866                 dynamic_cast<AliPreprocessor*> (fPreprocessorMap.GetValue(fCurrentDetector));
1867
1868         aPreprocessor->Initialize(GetCurrentRun(), GetCurrentStartTime(), GetCurrentEndTime());
1869
1870         Bool_t processDCS = aPreprocessor->ProcessDCS();
1871
1872         if (!processDCS)
1873         {
1874                 Log(fCurrentDetector, "ProcessCurrentDetector -"
1875                         " The preprocessor requested to skip the retrieval of DCS values");
1876         }
1877         else if (fTestMode & kSkipDCS)
1878         {
1879                 Log(fCurrentDetector, "ProcessCurrentDetector - In TESTMODE: Skipping DCS processing");
1880         } 
1881         else if (fTestMode & kErrorDCS)
1882         {
1883                 Log(fCurrentDetector, "ProcessCurrentDetector - In TESTMODE: Simulating DCS error");
1884                 UpdateShuttleStatus(AliShuttleStatus::kDCSStarted);
1885                 UpdateShuttleStatus(AliShuttleStatus::kDCSError);
1886                 delete dcsMap;
1887                 return kFALSE;
1888         } else {
1889
1890                 UpdateShuttleStatus(AliShuttleStatus::kDCSStarted);
1891
1892                 // Query DCS archive
1893                 Int_t nServers = fConfig->GetNServers(fCurrentDetector);
1894                 
1895                 for (int iServ=0; iServ<nServers; iServ++)
1896                 {
1897                 
1898                         TString host(fConfig->GetDCSHost(fCurrentDetector, iServ));
1899                         Int_t port = fConfig->GetDCSPort(fCurrentDetector, iServ);
1900                         Int_t multiSplit = fConfig->GetMultiSplit(fCurrentDetector, iServ);
1901
1902                         Log(fCurrentDetector, Form("ProcessCurrentDetector -"
1903                                         " Querying DCS Amanda server %s:%d (%d of %d)", 
1904                                         host.Data(), port, iServ+1, nServers));
1905                         
1906                         TMap* aliasMap = 0;
1907                         TMap* dpMap = 0;
1908         
1909                         if (fConfig->GetDCSAliases(fCurrentDetector, iServ)->GetEntries() > 0)
1910                         {
1911                                 aliasMap = GetValueSet(host, port, 
1912                                                 fConfig->GetDCSAliases(fCurrentDetector, iServ), 
1913                                                 kAlias, multiSplit);
1914                                 if (!aliasMap)
1915                                 {
1916                                         Log(fCurrentDetector, 
1917                                                 Form("ProcessCurrentDetector -"
1918                                                         " Error retrieving DCS aliases from server %s."
1919                                                         " Sending mail to DCS experts!", host.Data()));
1920                                         UpdateShuttleStatus(AliShuttleStatus::kDCSError);
1921                                         
1922                                         if (!SendMailToDCS())
1923                                                 Log("SHUTTLE", Form("ProcessCurrentDetector - "
1924                                                         "Could not send mail to DCS experts!"));
1925
1926                                         delete dcsMap;
1927                                         return kFALSE;
1928                                 }
1929                         }
1930                         
1931                         if (fConfig->GetDCSDataPoints(fCurrentDetector, iServ)->GetEntries() > 0)
1932                         {
1933                                 dpMap = GetValueSet(host, port, 
1934                                                 fConfig->GetDCSDataPoints(fCurrentDetector, iServ), 
1935                                                 kDP, multiSplit);
1936                                 if (!dpMap)
1937                                 {
1938                                         Log(fCurrentDetector, 
1939                                                 Form("ProcessCurrentDetector -"
1940                                                         " Error retrieving DCS data points from server %s."
1941                                                         " Sending mail to DCS experts!", host.Data()));
1942                                         UpdateShuttleStatus(AliShuttleStatus::kDCSError);
1943                                         
1944                                         if (!SendMailToDCS())
1945                                                 Log("SHUTTLE", Form("ProcessCurrentDetector - "
1946                                                         "Could not send mail to DCS experts!"));
1947                                         
1948                                         if (aliasMap) delete aliasMap;
1949                                         delete dcsMap;
1950                                         return kFALSE;
1951                                 }                               
1952                         }
1953                         
1954                         // merge aliasMap and dpMap into dcsMap
1955                         if(aliasMap) {
1956                                 TIter iter(aliasMap);
1957                                 TObjString* key = 0;
1958                                 while ((key = (TObjString*) iter.Next()))
1959                                         dcsMap->Add(key, aliasMap->GetValue(key->String()));
1960                                 
1961                                 aliasMap->SetOwner(kFALSE);
1962                                 delete aliasMap;
1963                         }       
1964                         
1965                         if(dpMap) {
1966                                 TIter iter(dpMap);
1967                                 TObjString* key = 0;
1968                                 while ((key = (TObjString*) iter.Next()))
1969                                         dcsMap->Add(key, dpMap->GetValue(key->String()));
1970                                 
1971                                 dpMap->SetOwner(kFALSE);
1972                                 delete dpMap;
1973                         }
1974                 }
1975         }
1976         
1977         // save map into file, to help debugging in case of preprocessor error
1978         TFile* f = TFile::Open("DCSMap.root","recreate");
1979         f->cd();
1980         dcsMap->Write("DCSMap", TObject::kSingleKey);
1981         f->Close();
1982         delete f;
1983         
1984         // DCS Archive DB processing successful. Call Preprocessor!
1985         UpdateShuttleStatus(AliShuttleStatus::kPPStarted);
1986
1987         UInt_t returnValue = aPreprocessor->Process(dcsMap);
1988
1989         if (returnValue > 0) // Preprocessor error!
1990         {
1991                 Log(fCurrentDetector, Form("ProcessCurrentDetector - "
1992                                 "Preprocessor failed. Process returned %d.", returnValue));
1993                 UpdateShuttleStatus(AliShuttleStatus::kPPError);
1994                 dcsMap->DeleteAll();
1995                 delete dcsMap;
1996                 return kFALSE;
1997         }
1998         
1999         // preprocessor ok!
2000         UpdateShuttleStatus(AliShuttleStatus::kPPDone);
2001         Log(fCurrentDetector, Form("ProcessCurrentDetector - %s preprocessor returned success",
2002                                 fCurrentDetector.Data()));
2003
2004         dcsMap->DeleteAll();
2005         delete dcsMap;
2006
2007         return kTRUE;
2008 }
2009
2010 //______________________________________________________________________________________________
2011 void AliShuttle::CountOpenRuns()
2012 {
2013         // Query DAQ's Shuttle logbook and sends the number of open runs to ML
2014         
2015         // check connection, in case connect
2016         if (!Connect(3)) 
2017                 return;
2018
2019         TString sqlQuery;
2020         sqlQuery = Form("select count(*) from %s where shuttle_done=0", fConfig->GetShuttlelbTable());
2021         
2022         TSQLResult* aResult = fServer[3]->Query(sqlQuery);
2023         if (!aResult) {
2024                 AliError(Form("Can't execute query <%s>!", sqlQuery.Data()));
2025                 return;
2026         }
2027
2028         AliDebug(2,Form("Query = %s", sqlQuery.Data()));
2029         
2030         if (aResult->GetRowCount() == 0) {
2031                 AliError(Form("No result for query %s received", sqlQuery.Data()));
2032                 return;
2033         }
2034
2035         if (aResult->GetFieldCount() != 1) {
2036                 AliError(Form("Invalid field count for query %s received", sqlQuery.Data()));
2037                 return;
2038         }
2039
2040         TSQLRow* aRow = aResult->Next();
2041         if (!aRow) {
2042                 AliError(Form("Could not receive result of query %s", sqlQuery.Data()));
2043                 return;
2044         }
2045         
2046         TString result(aRow->GetField(0), aRow->GetFieldLength(0));
2047         Int_t count = result.Atoi();
2048         
2049         Log("SHUTTLE", Form("%d unprocessed runs", count));
2050         
2051         delete aRow;
2052         delete aResult;
2053
2054         TMonaLisaValue mlStatus("SHUTTLE_openruns", count);
2055
2056         TList mlList;
2057         mlList.Add(&mlStatus);
2058
2059         fMonaLisa->SendParameters(&mlList, "__PROCESSINGINFO__");
2060 }
2061
2062 //______________________________________________________________________________________________
2063 Bool_t AliShuttle::QueryShuttleLogbook(const char* whereClause,
2064                 TObjArray& entries)
2065 {
2066         // Query DAQ's Shuttle logbook and fills detector status object.
2067         // Call QueryRunParameters to query DAQ logbook for run parameters.
2068         //
2069
2070         entries.SetOwner(1);
2071
2072         // check connection, in case connect
2073         if (!Connect(3)) return kFALSE;
2074
2075         TString sqlQuery;
2076         sqlQuery = Form("select * from %s %s order by run", fConfig->GetShuttlelbTable(), whereClause);
2077
2078         TSQLResult* aResult = fServer[3]->Query(sqlQuery);
2079         if (!aResult) {
2080                 AliError(Form("Can't execute query <%s>!", sqlQuery.Data()));
2081                 return kFALSE;
2082         }
2083
2084         AliDebug(2,Form("Query = %s", sqlQuery.Data()));
2085
2086         if(aResult->GetRowCount() == 0) {
2087                 Log("SHUTTLE", "No entries in Shuttle Logbook match request");
2088                 delete aResult;
2089                 return kTRUE;
2090         }
2091
2092         // TODO Check field count!
2093         const UInt_t nCols = 23;
2094         if (aResult->GetFieldCount() != (Int_t) nCols) {
2095                 Log("SHUTTLE", "Invalid SQL result field number!");
2096                 delete aResult;
2097                 return kFALSE;
2098         }
2099
2100         TSQLRow* aRow;
2101         while ((aRow = aResult->Next())) {
2102                 TString runString(aRow->GetField(0), aRow->GetFieldLength(0));
2103                 Int_t run = runString.Atoi();
2104
2105                 AliShuttleLogbookEntry *entry = QueryRunParameters(run);
2106                 if (!entry)
2107                         continue;
2108
2109                 // loop on detectors
2110                 for(UInt_t ii = 0; ii < nCols; ii++)
2111                         entry->SetDetectorStatus(aResult->GetFieldName(ii), aRow->GetField(ii));
2112
2113                 entries.AddLast(entry);
2114                 delete aRow;
2115         }
2116
2117         delete aResult;
2118         return kTRUE;
2119 }
2120
2121 //______________________________________________________________________________________________
2122 AliShuttleLogbookEntry* AliShuttle::QueryRunParameters(Int_t run)
2123 {
2124         //
2125         // Retrieve run parameters written in the DAQ logbook and sets them into AliShuttleLogbookEntry object
2126         //
2127
2128         // check connection, in case connect
2129         if (!Connect(3))
2130                 return 0;
2131
2132         TString sqlQuery;
2133         sqlQuery.Form("select * from %s where run=%d", fConfig->GetDAQlbTable(), run);
2134
2135         TSQLResult* aResult = fServer[3]->Query(sqlQuery);
2136         if (!aResult) {
2137                 Log("SHUTTLE", Form("Can't execute query <%s>!", sqlQuery.Data()));
2138                 return 0;
2139         }
2140
2141         if (aResult->GetRowCount() == 0) {
2142                 Log("SHUTTLE", Form("QueryRunParameters - No entry in DAQ Logbook for run %d. Skipping", run));
2143                 delete aResult;
2144                 return 0;
2145         }
2146
2147         if (aResult->GetRowCount() > 1) {
2148                 Log("SHUTTLE", Form("QueryRunParameters - UNEXPECTED: "
2149                                 "more than one entry in DAQ Logbook for run %d!", run));
2150                 delete aResult;
2151                 return 0;
2152         }
2153
2154         TSQLRow* aRow = aResult->Next();
2155         if (!aRow)
2156         {
2157                 Log("SHUTTLE", Form("QueryRunParameters - Could not retrieve row for run %d. Skipping", run));
2158                 delete aResult;
2159                 return 0;
2160         }
2161
2162         AliShuttleLogbookEntry* entry = new AliShuttleLogbookEntry(run);
2163
2164         for (Int_t ii = 0; ii < aResult->GetFieldCount(); ii++)
2165                 entry->SetRunParameter(aResult->GetFieldName(ii), aRow->GetField(ii));
2166
2167         UInt_t startTime = entry->GetStartTime();
2168         UInt_t endTime = entry->GetEndTime();
2169
2170 //      if (!startTime || !endTime || startTime > endTime) 
2171 //      {
2172 //              Log("SHUTTLE",
2173 //                      Form("QueryRunParameters - Invalid parameters for Run %d: startTime = %d, endTime = %d. Skipping!",
2174 //                              run, startTime, endTime));              
2175 //              
2176 //              Log("SHUTTLE", Form("Marking SHUTTLE done for run %d", run));
2177 //              fLogbookEntry = entry;  
2178 //              if (!UpdateShuttleLogbook("shuttle_done"))
2179 //              {
2180 //                      AliError(Form("Could not update logbook for run %d !", run));
2181 //              }
2182 //              fLogbookEntry = 0;
2183 //                              
2184 //              delete entry;
2185 //              delete aRow;
2186 //              delete aResult;
2187 //              return 0;
2188 //      }
2189
2190         if (!startTime) 
2191         {
2192                 Log("SHUTTLE",
2193                         Form("QueryRunParameters - Invalid parameters for Run %d: " 
2194                                 "startTime = %d, endTime = %d. Skipping!",
2195                                         run, startTime, endTime));              
2196                 
2197                 Log("SHUTTLE", Form("Marking SHUTTLE done for run %d", run));
2198                 fLogbookEntry = entry;  
2199                 if (!UpdateShuttleLogbook("shuttle_ignored"))
2200                 {
2201                         AliError(Form("Could not update logbook for run %d !", run));
2202                 }
2203                 fLogbookEntry = 0;
2204                                 
2205                 delete entry;
2206                 delete aRow;
2207                 delete aResult;
2208                 return 0;
2209         }
2210         
2211         if (startTime && !endTime) 
2212         {
2213                 // TODO Here we don't mark SHUTTLE done, because this may mean 
2214                 //the run is still ongoing!!            
2215                 Log("SHUTTLE",
2216                         Form("QueryRunParameters - Invalid parameters for Run %d: "
2217                              "startTime = %d, endTime = %d. Skipping (Shuttle won't be marked as DONE)!",
2218                                         run, startTime, endTime));              
2219                 
2220                 //Log("SHUTTLE", Form("Marking SHUTTLE done for run %d", run));
2221                 //fLogbookEntry = entry;        
2222                 //if (!UpdateShuttleLogbook("shuttle_done"))
2223                 //{
2224                 //      AliError(Form("Could not update logbook for run %d !", run));
2225                 //}
2226                 //fLogbookEntry = 0;
2227                                 
2228                 delete entry;
2229                 delete aRow;
2230                 delete aResult;
2231                 return 0;
2232         }
2233                         
2234         if (startTime && endTime && (startTime > endTime)) 
2235         {
2236                 Log("SHUTTLE",
2237                         Form("QueryRunParameters - Invalid parameters for Run %d: "
2238                                 "startTime = %d, endTime = %d. Skipping!",
2239                                         run, startTime, endTime));              
2240                 
2241                 Log("SHUTTLE", Form("Marking SHUTTLE done for run %d", run));
2242                 fLogbookEntry = entry;  
2243                 if (!UpdateShuttleLogbook("shuttle_ignored"))
2244                 {
2245                         AliError(Form("Could not update logbook for run %d !", run));
2246                 }
2247                 fLogbookEntry = 0;
2248                                 
2249                 delete entry;
2250                 delete aRow;
2251                 delete aResult;
2252                 return 0;
2253         }
2254                         
2255         TString totEventsStr = entry->GetRunParameter("totalEvents");  
2256         Int_t totEvents = totEventsStr.Atoi();
2257         if (totEvents < 1) 
2258         {
2259                 Log("SHUTTLE",
2260                         Form("QueryRunParameters - Run %d has 0 events - Skipping!", run));             
2261                 
2262                 Log("SHUTTLE", Form("Marking SHUTTLE done for run %d", run));           
2263                 fLogbookEntry = entry;  
2264                 if (!UpdateShuttleLogbook("shuttle_ignored"))
2265                 {
2266                         AliError(Form("Could not update logbook for run %d !", run));
2267                 }
2268                 fLogbookEntry = 0;
2269                                 
2270                 delete entry;
2271                 delete aRow;
2272                 delete aResult;
2273                 return 0;
2274         }
2275
2276         delete aRow;
2277         delete aResult;
2278
2279         return entry;
2280 }
2281
2282 //______________________________________________________________________________________________
2283 TMap* AliShuttle::GetValueSet(const char* host, Int_t port, const TSeqCollection* entries,
2284                               DCSType type, Int_t multiSplit)
2285 {
2286         // Retrieve all "entry" data points from the DCS server
2287         // host, port: TSocket connection parameters
2288         // entries: list of name of the alias or data point
2289         // type: kAlias or kDP
2290         // returns TMap of values, 0 when failure
2291         
2292         AliDCSClient client(host, port, fTimeout, fRetries, multiSplit);
2293
2294         TMap* result = 0;
2295         if (type == kAlias)
2296         {
2297                 result = client.GetAliasValues(entries, GetCurrentStartTime(), 
2298                         GetCurrentEndTime());
2299         } 
2300         else if (type == kDP)
2301         {
2302                 result = client.GetDPValues(entries, GetCurrentStartTime(), 
2303                         GetCurrentEndTime());
2304         }
2305
2306         if (result == 0)
2307         {
2308                 Log(fCurrentDetector.Data(), Form("GetValueSet - Can't get entries! Reason: %s",
2309                         client.GetErrorString(client.GetResultErrorCode())));
2310                 if (client.GetResultErrorCode() == AliDCSClient::fgkServerError)        
2311                         Log(fCurrentDetector.Data(), Form("GetValueSet - Server error code: %s",
2312                                 client.GetServerError().Data()));
2313
2314                 return 0;
2315         }
2316                 
2317         return result;
2318 }
2319
2320 //______________________________________________________________________________________________
2321 const char* AliShuttle::GetFile(Int_t system, const char* detector,
2322                 const char* id, const char* source)
2323 {
2324         // Get calibration file from file exchange servers
2325         // First queris the FXS database for the file name, using the run, detector, id and source info
2326         // then calls RetrieveFile(filename) for actual copy to local disk
2327         // run: current run being processed (given by Logbook entry fLogbookEntry)
2328         // detector: the Preprocessor name
2329         // id: provided as a parameter by the Preprocessor
2330         // source: provided by the Preprocessor through GetFileSources function
2331
2332         // check if test mode should simulate a FXS error
2333         if (fTestMode & kErrorFXSFiles)
2334         {
2335                 Log(detector, Form("GetFile - In TESTMODE - Simulating error while connecting to %s FXS", GetSystemName(system)));
2336                 return 0;
2337         }
2338         
2339         // check connection, in case connect
2340         if (!Connect(system))
2341         {
2342                 Log(detector, Form("GetFile - Couldn't connect to %s FXS database", GetSystemName(system)));
2343                 return 0;
2344         }
2345
2346         // Query preparation
2347         TString sourceName(source);
2348         Int_t nFields = 3;
2349         TString sqlQueryStart = Form("select filePath,size,fileChecksum from %s where",
2350                                                                 fConfig->GetFXSdbTable(system));
2351         TString whereClause = Form("run=%d and detector=\"%s\" and fileId=\"%s\"",
2352                                                                 GetCurrentRun(), detector, id);
2353
2354         if (system == kDAQ)
2355         {
2356                 whereClause += Form(" and DAQsource=\"%s\"", source);
2357         }
2358         else if (system == kDCS)
2359         {
2360                 sourceName="none";
2361         }
2362         else if (system == kHLT)
2363         {
2364                 whereClause += Form(" and DDLnumbers=\"%s\"", source);
2365                 nFields = 3;
2366         }
2367
2368         TString sqlQuery = Form("%s %s", sqlQueryStart.Data(), whereClause.Data());
2369
2370         AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
2371
2372         // Query execution
2373         TSQLResult* aResult = 0;
2374         aResult = dynamic_cast<TSQLResult*> (fServer[system]->Query(sqlQuery));
2375         if (!aResult) {
2376                 Log(detector, Form("GetFileName - Can't execute SQL query to %s database for: id = %s, source = %s",
2377                                 GetSystemName(system), id, sourceName.Data()));
2378                 return 0;
2379         }
2380
2381         if(aResult->GetRowCount() == 0)
2382         {
2383                 Log(detector,
2384                         Form("GetFileName - No entry in %s FXS db for: id = %s, source = %s",
2385                                 GetSystemName(system), id, sourceName.Data()));
2386                 delete aResult;
2387                 return 0;
2388         }
2389
2390         if (aResult->GetRowCount() > 1) {
2391                 Log(detector,
2392                         Form("GetFileName - More than one entry in %s FXS db for: id = %s, source = %s",
2393                                 GetSystemName(system), id, sourceName.Data()));
2394                 delete aResult;
2395                 return 0;
2396         }
2397
2398         if (aResult->GetFieldCount() != nFields) {
2399                 Log(detector,
2400                         Form("GetFileName - Wrong field count in %s FXS db for: id = %s, source = %s",
2401                                 GetSystemName(system), id, sourceName.Data()));
2402                 delete aResult;
2403                 return 0;
2404         }
2405
2406         TSQLRow* aRow = dynamic_cast<TSQLRow*> (aResult->Next());
2407
2408         if (!aRow){
2409                 Log(detector, Form("GetFileName - Empty set result in %s FXS db from query: id = %s, source = %s",
2410                                 GetSystemName(system), id, sourceName.Data()));
2411                 delete aResult;
2412                 return 0;
2413         }
2414
2415         TString filePath(aRow->GetField(0), aRow->GetFieldLength(0));
2416         TString fileSize(aRow->GetField(1), aRow->GetFieldLength(1));
2417         TString fileChecksum(aRow->GetField(2), aRow->GetFieldLength(2));
2418
2419         delete aResult;
2420         delete aRow;
2421
2422         AliDebug(2, Form("filePath = %s; size = %s, fileChecksum = %s",
2423                                 filePath.Data(), fileSize.Data(), fileChecksum.Data()));
2424
2425         // retrieved file is renamed to make it unique
2426         TString localFileName = Form("%s/%s_%d_process/%s_%s_%d_%s_%s.shuttle",
2427                                         GetShuttleTempDir(), detector, GetCurrentRun(),
2428                                         GetSystemName(system), detector, GetCurrentRun(), 
2429                                         id, sourceName.Data());
2430
2431
2432         // file retrieval from FXS
2433         UInt_t nRetries = 0;
2434         UInt_t maxRetries = 3;
2435         Bool_t result = kFALSE;
2436
2437         // copy!! if successful TSystem::Exec returns 0
2438         while(nRetries++ < maxRetries) {
2439                 AliDebug(2, Form("Trying to copy file. Retry # %d", nRetries));
2440                 result = RetrieveFile(system, filePath.Data(), localFileName.Data());
2441                 if(!result)
2442                 {
2443                         Log(detector, Form("GetFileName - Copy of file %s from %s FXS failed",
2444                                         filePath.Data(), GetSystemName(system)));
2445                         continue;
2446                 } 
2447
2448                 if (fileChecksum.Length()>0)
2449                 {
2450                         // compare md5sum of local file with the one stored in the FXS DB
2451                         Int_t md5Comp = gSystem->Exec(Form("md5sum %s |grep %s 2>&1 > /dev/null",
2452                                                 localFileName.Data(), fileChecksum.Data()));
2453
2454                         if (md5Comp != 0)
2455                         {
2456                                 Log(detector, Form("GetFileName - md5sum of file %s does not match with local copy!",
2457                                                         filePath.Data()));
2458                                 result = kFALSE;
2459                                 continue;
2460                         }
2461                 } else {
2462                         Log(fCurrentDetector, Form("GetFile - md5sum of file %s not set in %s database, skipping comparison",
2463                                                         filePath.Data(), GetSystemName(system)));
2464                 }
2465                 if (result) break;
2466         }
2467
2468         if(!result) return 0;
2469
2470         fFXSCalled[system]=kTRUE;
2471         TObjString *fileParams = new TObjString(Form("%s#!?!#%s", id, sourceName.Data()));
2472         fFXSlist[system].Add(fileParams);
2473
2474         static TString staticLocalFileName;
2475         staticLocalFileName.Form("%s", localFileName.Data());
2476         
2477         Log(fCurrentDetector, Form("GetFile - Retrieved file with id %s and "
2478                         "source %s from %s to %s", id, source, 
2479                         GetSystemName(system), localFileName.Data()));
2480                         
2481         return staticLocalFileName.Data();
2482 }
2483
2484 //______________________________________________________________________________________________
2485 Bool_t AliShuttle::RetrieveFile(UInt_t system, const char* fxsFileName, const char* localFileName)
2486 {
2487         //
2488         // Copies file from FXS to local Shuttle machine
2489         //
2490
2491         // check temp directory: trying to cd to temp; if it does not exist, create it
2492         AliDebug(2, Form("Copy file %s from %s FXS into %s",
2493                         GetSystemName(system), fxsFileName, localFileName));
2494                         
2495         TString tmpDir(localFileName);
2496         
2497         tmpDir = tmpDir(0,tmpDir.Last('/'));
2498
2499         Int_t noDir = gSystem->GetPathInfo(tmpDir.Data(), 0, (Long64_t*) 0, 0, 0);
2500         if (noDir) // temp dir does not exists!
2501         {
2502                 if (gSystem->mkdir(tmpDir.Data(), 1))
2503                 {
2504                         Log(fCurrentDetector.Data(), "RetrieveFile - could not make temp directory!!");
2505                         return kFALSE;
2506                 }
2507         }
2508
2509         TString baseFXSFolder;
2510         if (system == kDAQ)
2511         {
2512                 baseFXSFolder = "FES/";
2513         }
2514         else if (system == kDCS)
2515         {
2516                 baseFXSFolder = "";
2517         }
2518         else if (system == kHLT)
2519         {
2520                 baseFXSFolder = "/opt/FXS/";
2521         }
2522
2523
2524         TString command = Form("scp -oPort=%d -2 %s@%s:%s%s %s",
2525                 fConfig->GetFXSPort(system),
2526                 fConfig->GetFXSUser(system),
2527                 fConfig->GetFXSHost(system),
2528                 baseFXSFolder.Data(),
2529                 fxsFileName,
2530                 localFileName);
2531
2532         AliDebug(2, Form("%s",command.Data()));
2533
2534         Bool_t result = (gSystem->Exec(command.Data()) == 0);
2535
2536         return result;
2537 }
2538
2539 //______________________________________________________________________________________________
2540 TList* AliShuttle::GetFileSources(Int_t system, const char* detector, const char* id)
2541 {
2542         //
2543         // Get sources producing the condition file Id from file exchange servers
2544         // if id is NULL all sources are returned (distinct)
2545         //
2546
2547         if (id)
2548         {
2549                 Log(detector, Form("GetFileSources - Querying %s FXS for files with id %s produced by %s", GetSystemName(system), id, detector));
2550         } else {
2551                 Log(detector, Form("GetFileSources - Querying %s FXS for files produced by %s", GetSystemName(system), detector));
2552         }
2553         
2554         // check if test mode should simulate a FXS error
2555         if (fTestMode & kErrorFXSSources)
2556         {
2557                 Log(detector, Form("GetFileSources - In TESTMODE - Simulating error while connecting to %s FXS", GetSystemName(system)));
2558                 return 0;
2559         }
2560
2561         if (system == kDCS)
2562         {
2563                 Log(detector, "GetFileSources - WARNING: DCS system has only one source of data!");
2564                 TList *list = new TList();
2565                 list->SetOwner(1);
2566                 list->Add(new TObjString(" "));
2567                 return list;
2568         }
2569
2570         // check connection, in case connect
2571         if (!Connect(system))
2572         {
2573                 Log(detector, Form("GetFileSources - Couldn't connect to %s FXS database", GetSystemName(system)));
2574                 return NULL;
2575         }
2576
2577         TString sourceName = 0;
2578         if (system == kDAQ)
2579         {
2580                 sourceName = "DAQsource";
2581         } else if (system == kHLT)
2582         {
2583                 sourceName = "DDLnumbers";
2584         }
2585
2586         TString sqlQueryStart = Form("select distinct %s from %s where", sourceName.Data(), fConfig->GetFXSdbTable(system));
2587         TString whereClause = Form("run=%d and detector=\"%s\"",
2588                                 GetCurrentRun(), detector);
2589         if (id)
2590                 whereClause += Form(" and fileId=\"%s\"", id);
2591         TString sqlQuery = Form("%s %s", sqlQueryStart.Data(), whereClause.Data());
2592
2593         AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
2594
2595         // Query execution
2596         TSQLResult* aResult;
2597         aResult = fServer[system]->Query(sqlQuery);
2598         if (!aResult) {
2599                 Log(detector, Form("GetFileSources - Can't execute SQL query to %s database for id: %s",
2600                                 GetSystemName(system), id));
2601                 return 0;
2602         }
2603
2604         TList *list = new TList();
2605         list->SetOwner(1);
2606         
2607         if (aResult->GetRowCount() == 0)
2608         {
2609                 Log(detector,
2610                         Form("GetFileSources - No entry in %s FXS table for id: %s", GetSystemName(system), id));
2611                 delete aResult;
2612                 return list;
2613         }
2614
2615         Log(detector, Form("GetFileSources - Found %d sources", aResult->GetRowCount()));
2616
2617         TSQLRow* aRow;
2618         while ((aRow = aResult->Next()))
2619         {
2620
2621                 TString source(aRow->GetField(0), aRow->GetFieldLength(0));
2622                 AliDebug(2, Form("%s = %s", sourceName.Data(), source.Data()));
2623                 list->Add(new TObjString(source));
2624                 delete aRow;
2625         }
2626
2627         delete aResult;
2628
2629         return list;
2630 }
2631
2632 //______________________________________________________________________________________________
2633 TList* AliShuttle::GetFileIDs(Int_t system, const char* detector, const char* source)
2634 {
2635         //
2636         // Get all ids of condition files produced by a given source from file exchange servers
2637         //
2638         
2639         Log(detector, Form("GetFileIDs - Retrieving ids with source %s with %s", source, GetSystemName(system)));
2640
2641         // check if test mode should simulate a FXS error
2642         if (fTestMode & kErrorFXSSources)
2643         {
2644                 Log(detector, Form("GetFileIDs - In TESTMODE - Simulating error while connecting to %s FXS", GetSystemName(system)));
2645                 return 0;
2646         }
2647
2648         // check connection, in case connect
2649         if (!Connect(system))
2650         {
2651                 Log(detector, Form("GetFileIDs - Couldn't connect to %s FXS database", GetSystemName(system)));
2652                 return NULL;
2653         }
2654
2655         TString sourceName = 0;
2656         if (system == kDAQ)
2657         {
2658                 sourceName = "DAQsource";
2659         } else if (system == kHLT)
2660         {
2661                 sourceName = "DDLnumbers";
2662         }
2663
2664         TString sqlQueryStart = Form("select fileId from %s where", fConfig->GetFXSdbTable(system));
2665         TString whereClause = Form("run=%d and detector=\"%s\"",
2666                                 GetCurrentRun(), detector);
2667         if (sourceName.Length() > 0 && source)
2668                 whereClause += Form(" and %s=\"%s\"", sourceName.Data(), source);
2669         TString sqlQuery = Form("%s %s", sqlQueryStart.Data(), whereClause.Data());
2670
2671         AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
2672
2673         // Query execution
2674         TSQLResult* aResult;
2675         aResult = fServer[system]->Query(sqlQuery);
2676         if (!aResult) {
2677                 Log(detector, Form("GetFileIDs - Can't execute SQL query to %s database for source: %s",
2678                                 GetSystemName(system), source));
2679                 return 0;
2680         }
2681
2682         TList *list = new TList();
2683         list->SetOwner(1);
2684         
2685         if (aResult->GetRowCount() == 0)
2686         {
2687                 Log(detector,
2688                         Form("GetFileIDs - No entry in %s FXS table for source: %s", GetSystemName(system), source));
2689                 delete aResult;
2690                 return list;
2691         }
2692
2693         Log(detector, Form("GetFileIDs - Found %d ids", aResult->GetRowCount()));
2694
2695         TSQLRow* aRow;
2696
2697         while ((aRow = aResult->Next()))
2698         {
2699
2700                 TString id(aRow->GetField(0), aRow->GetFieldLength(0));
2701                 AliDebug(2, Form("fileId = %s", id.Data()));
2702                 list->Add(new TObjString(id));
2703                 delete aRow;
2704         }
2705
2706         delete aResult;
2707
2708         return list;
2709 }
2710
2711 //______________________________________________________________________________________________
2712 Bool_t AliShuttle::Connect(Int_t system)
2713 {
2714         // Connect to MySQL Server of the system's FXS MySQL databases
2715         // DAQ Logbook, Shuttle Logbook and DAQ FXS db are on the same host
2716         //
2717
2718         // check connection: if already connected return
2719         if(fServer[system] && fServer[system]->IsConnected()) return kTRUE;
2720
2721         TString dbHost, dbUser, dbPass, dbName;
2722
2723         if (system < 3) // FXS db servers
2724         {
2725                 dbHost = Form("mysql://%s:%d", fConfig->GetFXSdbHost(system), fConfig->GetFXSdbPort(system));
2726                 dbUser = fConfig->GetFXSdbUser(system);
2727                 dbPass = fConfig->GetFXSdbPass(system);
2728                 dbName =   fConfig->GetFXSdbName(system);
2729         } else { // Run & Shuttle logbook servers
2730         // TODO Will the Shuttle logbook server be the same as the Run logbook server ???
2731                 dbHost = Form("mysql://%s:%d", fConfig->GetDAQlbHost(), fConfig->GetDAQlbPort());
2732                 dbUser = fConfig->GetDAQlbUser();
2733                 dbPass = fConfig->GetDAQlbPass();
2734                 dbName =   fConfig->GetDAQlbDB();
2735         }
2736
2737         fServer[system] = TSQLServer::Connect(dbHost.Data(), dbUser.Data(), dbPass.Data());
2738         if (!fServer[system] || !fServer[system]->IsConnected()) {
2739                 if(system < 3)
2740                 {
2741                 AliError(Form("Can't establish connection to FXS database for %s",
2742                                         AliShuttleInterface::GetSystemName(system)));
2743                 } else {
2744                 AliError("Can't establish connection to Run logbook.");
2745                 }
2746                 if(fServer[system]) delete fServer[system];
2747                 return kFALSE;
2748         }
2749
2750         // Get tables
2751         TSQLResult* aResult=0;
2752         switch(system){
2753                 case kDAQ:
2754                         aResult = fServer[kDAQ]->GetTables(dbName.Data());
2755                         break;
2756                 case kDCS:
2757                         aResult = fServer[kDCS]->GetTables(dbName.Data());
2758                         break;
2759                 case kHLT:
2760                         aResult = fServer[kHLT]->GetTables(dbName.Data());
2761                         break;
2762                 default:
2763                         aResult = fServer[3]->GetTables(dbName.Data());
2764                         break;
2765         }
2766
2767         delete aResult;
2768         return kTRUE;
2769 }
2770
2771 //______________________________________________________________________________________________
2772 Bool_t AliShuttle::UpdateTable()
2773 {
2774         //
2775         // Update FXS table filling time_processed field in all rows corresponding to current run and detector
2776         //
2777
2778         Bool_t result = kTRUE;
2779
2780         for (UInt_t system=0; system<3; system++)
2781         {
2782                 if(!fFXSCalled[system]) continue;
2783
2784                 // check connection, in case connect
2785                 if (!Connect(system))
2786                 {
2787                         Log(fCurrentDetector, Form("UpdateTable - Couldn't connect to %s FXS database", GetSystemName(system)));
2788                         result = kFALSE;
2789                         continue;
2790                 }
2791
2792                 TTimeStamp now; // now
2793
2794                 // Loop on FXS list entries
2795                 TIter iter(&fFXSlist[system]);
2796                 TObjString *aFXSentry=0;
2797                 while ((aFXSentry = dynamic_cast<TObjString*> (iter.Next())))
2798                 {
2799                         TString aFXSentrystr = aFXSentry->String();
2800                         TObjArray *aFXSarray = aFXSentrystr.Tokenize("#!?!#");
2801                         if (!aFXSarray || aFXSarray->GetEntries() != 2 )
2802                         {
2803                                 Log(fCurrentDetector, Form("UpdateTable - error updating %s FXS entry. Check string: <%s>",
2804                                         GetSystemName(system), aFXSentrystr.Data()));
2805                                 if(aFXSarray) delete aFXSarray;
2806                                 result = kFALSE;
2807                                 continue;
2808                         }
2809                         const char* fileId = ((TObjString*) aFXSarray->At(0))->GetName();
2810                         const char* source = ((TObjString*) aFXSarray->At(1))->GetName();
2811
2812                         TString whereClause;
2813                         if (system == kDAQ)
2814                         {
2815                                 whereClause = Form("where run=%d and detector=\"%s\" and fileId=\"%s\" and DAQsource=\"%s\";",
2816                                                         GetCurrentRun(), fCurrentDetector.Data(), fileId, source);
2817                         }
2818                         else if (system == kDCS)
2819                         {
2820                                 whereClause = Form("where run=%d and detector=\"%s\" and fileId=\"%s\";",
2821                                                         GetCurrentRun(), fCurrentDetector.Data(), fileId);
2822                         }
2823                         else if (system == kHLT)
2824                         {
2825                                 whereClause = Form("where run=%d and detector=\"%s\" and fileId=\"%s\" and DDLnumbers=\"%s\";",
2826                                                         GetCurrentRun(), fCurrentDetector.Data(), fileId, source);
2827                         }
2828
2829                         delete aFXSarray;
2830
2831                         TString sqlQuery = Form("update %s set time_processed=%d %s", fConfig->GetFXSdbTable(system),
2832                                                                 now.GetSec(), whereClause.Data());
2833
2834                         AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
2835
2836                         // Query execution
2837                         TSQLResult* aResult;
2838                         aResult = dynamic_cast<TSQLResult*> (fServer[system]->Query(sqlQuery));
2839                         if (!aResult)
2840                         {
2841                                 Log(fCurrentDetector, Form("UpdateTable - %s db: can't execute SQL query <%s>",
2842                                                                 GetSystemName(system), sqlQuery.Data()));
2843                                 result = kFALSE;
2844                                 continue;
2845                         }
2846                         delete aResult;
2847                 }
2848         }
2849
2850         return result;
2851 }
2852
2853 //______________________________________________________________________________________________
2854 Bool_t AliShuttle::UpdateTableFailCase()
2855 {
2856         // Update FXS table filling time_processed field in all rows corresponding to current run and detector
2857         // this is called in case the preprocessor is declared failed for the current run, because
2858         // the fields are updated only in case of success
2859
2860         Bool_t result = kTRUE;
2861
2862         for (UInt_t system=0; system<3; system++)
2863         {
2864                 // check connection, in case connect
2865                 if (!Connect(system))
2866                 {
2867                         Log(fCurrentDetector, Form("UpdateTableFailCase - Couldn't connect to %s FXS database",
2868                                                         GetSystemName(system)));
2869                         result = kFALSE;
2870                         continue;
2871                 }
2872
2873                 TTimeStamp now; // now
2874
2875                 // Loop on FXS list entries
2876
2877                 TString whereClause = Form("where run=%d and detector=\"%s\";",
2878                                                 GetCurrentRun(), fCurrentDetector.Data());
2879
2880
2881                 TString sqlQuery = Form("update %s set time_processed=%d %s", fConfig->GetFXSdbTable(system),
2882                                                         now.GetSec(), whereClause.Data());
2883
2884                 AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
2885
2886                 // Query execution
2887                 TSQLResult* aResult;
2888                 aResult = dynamic_cast<TSQLResult*> (fServer[system]->Query(sqlQuery));
2889                 if (!aResult)
2890                 {
2891                         Log(fCurrentDetector, Form("UpdateTableFailCase - %s db: can't execute SQL query <%s>",
2892                                                         GetSystemName(system), sqlQuery.Data()));
2893                         result = kFALSE;
2894                         continue;
2895                 }
2896                 delete aResult;
2897         }
2898
2899         return result;
2900 }
2901
2902 //______________________________________________________________________________________________
2903 Bool_t AliShuttle::UpdateShuttleLogbook(const char* detector, const char* status)
2904 {
2905         //
2906         // Update Shuttle logbook filling detector or shuttle_done column
2907         // ex. of usage: UpdateShuttleLogbook("PHOS", "DONE") or UpdateShuttleLogbook("shuttle_done")
2908         //
2909
2910         // check connection, in case connect
2911         if(!Connect(3)){
2912                 Log("SHUTTLE", "UpdateShuttleLogbook - Couldn't connect to DAQ Logbook.");
2913                 return kFALSE;
2914         }
2915
2916         TString detName(detector);
2917         TString setClause;
2918         if (detName == "shuttle_done" || detName == "shuttle_ignored")
2919         {
2920                 setClause = "set shuttle_done=1";
2921
2922                 if (detName == "shuttle_done")
2923                 {
2924                         // Send the information to ML
2925                         TMonaLisaText  mlStatus("SHUTTLE_status", "Done");
2926
2927                         TList mlList;
2928                         mlList.Add(&mlStatus);
2929                 
2930                         TString mlID;
2931                         mlID.Form("%d", GetCurrentRun());
2932                         fMonaLisa->SendParameters(&mlList, mlID);
2933                 }
2934         } else {
2935                 TString statusStr(status);
2936                 if(statusStr.Contains("done", TString::kIgnoreCase) ||
2937                    statusStr.Contains("failed", TString::kIgnoreCase)){
2938                         setClause = Form("set %s=\"%s\"", detector, status);
2939                 } else {
2940                         Log("SHUTTLE",
2941                                 Form("UpdateShuttleLogbook - Invalid status <%s> for detector %s",
2942                                         status, detector));
2943                         return kFALSE;
2944                 }
2945         }
2946
2947         TString whereClause = Form("where run=%d", GetCurrentRun());
2948
2949         TString sqlQuery = Form("update %s %s %s",
2950                                         fConfig->GetShuttlelbTable(), setClause.Data(), whereClause.Data());
2951
2952         AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
2953
2954         // Query execution
2955         TSQLResult* aResult;
2956         aResult = dynamic_cast<TSQLResult*> (fServer[3]->Query(sqlQuery));
2957         if (!aResult) {
2958                 Log("SHUTTLE", Form("UpdateShuttleLogbook - Can't execute query <%s>", sqlQuery.Data()));
2959                 return kFALSE;
2960         }
2961         delete aResult;
2962
2963         return kTRUE;
2964 }
2965
2966 //______________________________________________________________________________________________
2967 Int_t AliShuttle::GetCurrentRun() const
2968 {
2969         //
2970         // Get current run from logbook entry
2971         //
2972
2973         return fLogbookEntry ? fLogbookEntry->GetRun() : -1;
2974 }
2975
2976 //______________________________________________________________________________________________
2977 UInt_t AliShuttle::GetCurrentStartTime() const
2978 {
2979         //
2980         // get current start time
2981         //
2982
2983         return fLogbookEntry ? fLogbookEntry->GetStartTime() : 0;
2984 }
2985
2986 //______________________________________________________________________________________________
2987 UInt_t AliShuttle::GetCurrentEndTime() const
2988 {
2989         //
2990         // get current end time from logbook entry
2991         //
2992
2993         return fLogbookEntry ? fLogbookEntry->GetEndTime() : 0;
2994 }
2995
2996 //______________________________________________________________________________________________
2997 UInt_t AliShuttle::GetCurrentYear() const
2998 {
2999         //
3000         // Get current year from logbook entry
3001         //
3002
3003         if (!fLogbookEntry) return 0;
3004         
3005         TTimeStamp startTime(GetCurrentStartTime());
3006         TString year =  Form("%d",startTime.GetDate());
3007         year = year(0,4);
3008         
3009         return year.Atoi();
3010 }
3011
3012 //______________________________________________________________________________________________
3013 const char* AliShuttle::GetLHCPeriod() const
3014 {
3015         //
3016         // Get current LHC period from logbook entry
3017         //
3018
3019         if (!fLogbookEntry) return 0;
3020                 
3021         return fLogbookEntry->GetRunParameter("LHCperiod");
3022 }
3023
3024 //______________________________________________________________________________________________
3025 void AliShuttle::Log(const char* detector, const char* message)
3026 {
3027         //
3028         // Fill log string with a message
3029         //
3030
3031         TString logRunDir = GetShuttleLogDir();
3032         if (GetCurrentRun() >=0)
3033                 logRunDir += Form("/%d", GetCurrentRun());
3034         
3035         void* dir = gSystem->OpenDirectory(logRunDir.Data());
3036         if (dir == NULL) {
3037                 if (gSystem->mkdir(logRunDir.Data(), kTRUE)) {
3038                         AliError(Form("Can't open directory <%s>", GetShuttleLogDir()));
3039                         return;
3040                 }
3041
3042         } else {
3043                 gSystem->FreeDirectory(dir);
3044         }
3045
3046         TString toLog = Form("%s (%d): %s - ", TTimeStamp(time(0)).AsString("s"), getpid(), detector);
3047         if (GetCurrentRun() >= 0) 
3048                 toLog += Form("run %d - ", GetCurrentRun());
3049         toLog += Form("%s", message);
3050
3051         AliInfo(toLog.Data());
3052         
3053         // if we redirect the log output already to the file, leave here
3054         if (fOutputRedirected && strcmp(detector, "SHUTTLE") != 0)
3055                 return;
3056
3057         TString fileName = GetLogFileName(detector);
3058         
3059         gSystem->ExpandPathName(fileName);
3060
3061         ofstream logFile;
3062         logFile.open(fileName, ofstream::out | ofstream::app);
3063
3064         if (!logFile.is_open()) {
3065                 AliError(Form("Could not open file %s", fileName.Data()));
3066                 return;
3067         }
3068
3069         logFile << toLog.Data() << "\n";
3070
3071         logFile.close();
3072 }
3073
3074 //______________________________________________________________________________________________
3075 TString AliShuttle::GetLogFileName(const char* detector) const
3076 {
3077         // 
3078         // returns the name of the log file for a given sub detector
3079         //
3080         
3081         TString fileName;
3082         
3083         if (GetCurrentRun() >= 0) 
3084         {
3085                 fileName.Form("%s/%d/%s_%d.log", GetShuttleLogDir(), GetCurrentRun(), 
3086                         detector, GetCurrentRun());
3087         } else {
3088                 fileName.Form("%s/%s.log", GetShuttleLogDir(), detector);
3089         }
3090
3091         return fileName;
3092 }
3093
3094 //______________________________________________________________________________________________
3095 void AliShuttle::SendAlive()
3096 {
3097         // sends alive message to ML
3098         
3099         TMonaLisaText mlStatus("SHUTTLE_status", "Alive");
3100
3101         TList mlList;
3102         mlList.Add(&mlStatus);
3103
3104         fMonaLisa->SendParameters(&mlList, "__PROCESSINGINFO__");
3105 }
3106
3107 //______________________________________________________________________________________________
3108 Bool_t AliShuttle::Collect(Int_t run)
3109 {
3110         //
3111         // Collects conditions data for all UNPROCESSED run written to DAQ LogBook in case of run = -1 (default)
3112         // If a dedicated run is given this run is processed
3113         //
3114         // In operational mode, this is the Shuttle function triggered by the EOR signal.
3115         //
3116
3117         if (run == -1)
3118                 Log("SHUTTLE","Collect - Shuttle called. Collecting conditions data for unprocessed runs");
3119         else
3120                 Log("SHUTTLE", Form("Collect - Shuttle called. Collecting conditions data for run %d", run));
3121
3122         SetLastAction("Starting");
3123
3124         // create ML instance
3125         if (!fMonaLisa)
3126                 fMonaLisa = new TMonaLisaWriter(fConfig->GetMonitorHost(), fConfig->GetMonitorTable());
3127                 
3128         SendAlive();
3129         CountOpenRuns();
3130
3131         TString whereClause("where shuttle_done=0");
3132         if (run != -1)
3133                 whereClause += Form(" and run=%d", run);
3134
3135         TObjArray shuttleLogbookEntries;
3136         if (!QueryShuttleLogbook(whereClause, shuttleLogbookEntries))
3137         {
3138                 Log("SHUTTLE", "Collect - Can't retrieve entries from Shuttle logbook");
3139                 return kFALSE;
3140         }
3141
3142         if (shuttleLogbookEntries.GetEntries() == 0)
3143         {
3144                 if (run == -1)
3145                         Log("SHUTTLE","Collect - Found no UNPROCESSED runs in Shuttle logbook");
3146                 else
3147                         Log("SHUTTLE", Form("Collect - Run %d is already DONE "
3148                                                 "or it does not exist in Shuttle logbook", run));
3149                 return kTRUE;
3150         }
3151
3152         for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
3153                 fFirstUnprocessed[iDet] = kTRUE;
3154
3155         if (run != -1)
3156         {
3157                 // query Shuttle logbook for earlier runs, check if some detectors are unprocessed,
3158                 // flag them into fFirstUnprocessed array
3159                 TString whereClause(Form("where shuttle_done=0 and run < %d", run));
3160                 TObjArray tmpLogbookEntries;
3161                 if (!QueryShuttleLogbook(whereClause, tmpLogbookEntries))
3162                 {
3163                         Log("SHUTTLE", "Collect - Can't retrieve entries from Shuttle logbook");
3164                         return kFALSE;
3165                 }
3166
3167                 TIter iter(&tmpLogbookEntries);
3168                 AliShuttleLogbookEntry* anEntry = 0;
3169                 while ((anEntry = dynamic_cast<AliShuttleLogbookEntry*> (iter.Next())))
3170                 {
3171                         for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
3172                         {
3173                                 if (anEntry->GetDetectorStatus(iDet) == AliShuttleLogbookEntry::kUnprocessed)
3174                                 {
3175                                         AliDebug(2, Form("Run %d: setting %s as \"not first time unprocessed\"",
3176                                                         anEntry->GetRun(), GetDetName(iDet)));
3177                                         fFirstUnprocessed[iDet] = kFALSE;
3178                                 }
3179                         }
3180
3181                 }
3182
3183         }
3184
3185         if (!RetrieveConditionsData(shuttleLogbookEntries))
3186         {
3187                 Log("SHUTTLE", "Collect - Process of at least one run failed");
3188                 CountOpenRuns();
3189                 return kFALSE;
3190         }
3191
3192         Log("SHUTTLE", "Collect - Requested run(s) successfully processed");
3193         CountOpenRuns();
3194         return kTRUE;
3195 }
3196
3197 //______________________________________________________________________________________________
3198 Bool_t AliShuttle::RetrieveConditionsData(const TObjArray& dateEntries)
3199 {
3200         //
3201         // Retrieve conditions data for all runs that aren't processed yet
3202         //
3203
3204         Bool_t hasError = kFALSE;
3205
3206         TIter iter(&dateEntries);
3207         AliShuttleLogbookEntry* anEntry;
3208
3209         while ((anEntry = (AliShuttleLogbookEntry*) iter.Next())){
3210                 if (!Process(anEntry)){
3211                         hasError = kTRUE;
3212                 }
3213
3214                 // clean SHUTTLE temp directory
3215                 //TString filename = Form("%s/*.shuttle", GetShuttleTempDir());
3216                 //RemoveFile(filename.Data());
3217         }
3218
3219         return hasError == kFALSE;
3220 }
3221
3222 //______________________________________________________________________________________________
3223 ULong_t AliShuttle::GetTimeOfLastAction() const
3224 {
3225         //
3226         // Gets time of last action
3227         //
3228
3229         ULong_t tmp;
3230
3231         fMonitoringMutex->Lock();
3232
3233         tmp = fLastActionTime;
3234
3235         fMonitoringMutex->UnLock();
3236
3237         return tmp;
3238 }
3239
3240 //______________________________________________________________________________________________
3241 const TString AliShuttle::GetLastAction() const
3242 {
3243         //
3244         // returns a string description of the last action
3245         //
3246
3247         TString tmp;
3248
3249         fMonitoringMutex->Lock();
3250         
3251         tmp = fLastAction;
3252         
3253         fMonitoringMutex->UnLock();
3254
3255         return tmp;
3256 }
3257
3258 //______________________________________________________________________________________________
3259 void AliShuttle::SetLastAction(const char* action)
3260 {
3261         //
3262         // updates the monitoring variables
3263         //
3264
3265         fMonitoringMutex->Lock();
3266
3267         fLastAction = action;
3268         fLastActionTime = time(0);
3269         
3270         fMonitoringMutex->UnLock();
3271 }
3272
3273 //______________________________________________________________________________________________
3274 const char* AliShuttle::GetRunParameter(const char* param)
3275 {
3276         //
3277         // returns run parameter read from DAQ logbook
3278         //
3279
3280         if(!fLogbookEntry) {
3281                 AliError("No logbook entry!");
3282                 return 0;
3283         }
3284
3285         return fLogbookEntry->GetRunParameter(param);
3286 }
3287
3288 //______________________________________________________________________________________________
3289 AliCDBEntry* AliShuttle::GetFromOCDB(const char* detector, const AliCDBPath& path)
3290 {
3291         //
3292         // returns object from OCDB valid for current run
3293         //
3294
3295         if (fTestMode & kErrorOCDB)
3296         {
3297                 Log(detector, "GetFromOCDB - In TESTMODE - Simulating error with OCDB");
3298                 return 0;
3299         }
3300         
3301         AliCDBStorage *sto = AliCDBManager::Instance()->GetStorage(fgkMainCDB);
3302         if (!sto)
3303         {
3304                 Log(detector, "GetFromOCDB - Cannot activate main OCDB for query!");
3305                 return 0;
3306         }
3307
3308         return dynamic_cast<AliCDBEntry*> (sto->Get(path, GetCurrentRun()));
3309 }
3310
3311 //______________________________________________________________________________________________
3312 Bool_t AliShuttle::SendMail()
3313 {
3314         //
3315         // sends a mail to the subdetector expert in case of preprocessor error
3316         //
3317         
3318         if (fTestMode != kNone)
3319                 return kTRUE;
3320                 
3321         if (!fConfig->SendMail()) return kTRUE;
3322
3323         TString to="";
3324         TIter iterExperts(fConfig->GetResponsibles(fCurrentDetector));
3325         TObjString *anExpert=0;
3326         while ((anExpert = (TObjString*) iterExperts.Next()))
3327         {
3328                 to += Form("%s,", anExpert->GetName());
3329         }
3330         if (to.Length() > 0)
3331           to.Remove(to.Length()-1);
3332         AliDebug(2, Form("to: %s",to.Data()));
3333
3334         if (to.IsNull()) {
3335                 Log("SHUTTLE", "List of detector responsibles not set!");
3336                 return kFALSE;
3337         }
3338
3339         void* dir = gSystem->OpenDirectory(GetShuttleLogDir());
3340         if (dir == NULL)
3341         {
3342                 if (gSystem->mkdir(GetShuttleLogDir(), kTRUE))
3343                 {
3344                         Log("SHUTTLE", Form("SendMail - Can't open directory <%s>", GetShuttleLogDir()));
3345                         return kFALSE;
3346                 }
3347
3348         } else {
3349                 gSystem->FreeDirectory(dir);
3350         }
3351
3352         TString bodyFileName;
3353         bodyFileName.Form("%s/mail.body", GetShuttleLogDir());
3354         gSystem->ExpandPathName(bodyFileName);
3355
3356         ofstream mailBody;
3357         mailBody.open(bodyFileName, ofstream::out);
3358
3359         if (!mailBody.is_open())
3360         {
3361                 Log("SHUTTLE", Form("Could not open mail body file %s", bodyFileName.Data()));
3362                 return kFALSE;
3363         }
3364
3365         TString cc="";
3366         TIter iterAdmins(fConfig->GetAdmins(AliShuttleConfig::kGlobal));
3367         TObjString *anAdmin=0;
3368         while ((anAdmin = (TObjString*) iterAdmins.Next()))
3369         {
3370                 cc += Form("%s,", anAdmin->GetName());
3371         }
3372         if (cc.Length() > 0)
3373           cc.Remove(to.Length()-1);
3374         AliDebug(2, Form("cc: %s",to.Data()));
3375
3376         TString subject = Form("%s Shuttle preprocessor FAILED in run %d (run type = %s)!",
3377                                 fCurrentDetector.Data(), GetCurrentRun(), GetRunType());
3378         AliDebug(2, Form("subject: %s", subject.Data()));
3379
3380         TString body = Form("Dear %s expert(s), \n\n", fCurrentDetector.Data());
3381         body += Form("SHUTTLE just detected that your preprocessor "
3382                         "failed processing run %d (run type = %s)!!\n\n", 
3383                                         GetCurrentRun(), GetRunType());
3384         body += Form("Please check %s status on the SHUTTLE monitoring page: \n\n", 
3385                                 fCurrentDetector.Data());
3386         if (fConfig->GetRunMode() == AliShuttleConfig::kTest)
3387         {
3388                 body += Form("\thttp://pcalimonitor.cern.ch:8889/shuttle.jsp?time=168 \n\n");
3389         } else {
3390                 body += Form("\thttp://pcalimonitor.cern.ch/shuttle.jsp?instance=PROD&time=168 \n\n");
3391         }
3392         
3393         
3394         TString logFolder = "logs";
3395         if (fConfig->GetRunMode() == AliShuttleConfig::kProd) 
3396                 logFolder += "_PROD";
3397         
3398         
3399         body += Form("Find the %s log for the current run on \n\n"
3400                 "\thttp://pcalishuttle01.cern.ch:8880/%s/%d/%s_%d.log \n\n", 
3401                 fCurrentDetector.Data(), logFolder.Data(), GetCurrentRun(), 
3402                                 fCurrentDetector.Data(), GetCurrentRun());
3403         body += Form("The last 10 lines of %s log file are following:\n\n", fCurrentDetector.Data());
3404
3405         AliDebug(2, Form("Body begin: %s", body.Data()));
3406
3407         mailBody << body.Data();
3408         mailBody.close();
3409         mailBody.open(bodyFileName, ofstream::out | ofstream::app);
3410
3411         TString logFileName = Form("%s/%d/%s_%d.log", GetShuttleLogDir(), 
3412                 GetCurrentRun(), fCurrentDetector.Data(), GetCurrentRun());
3413         TString tailCommand = Form("tail -n 10 %s >> %s", logFileName.Data(), bodyFileName.Data());
3414         if (gSystem->Exec(tailCommand.Data()))
3415         {
3416                 mailBody << Form("%s log file not found ...\n\n", fCurrentDetector.Data());
3417         }
3418
3419         TString endBody = Form("------------------------------------------------------\n\n");
3420         endBody += Form("In case of problems please contact the SHUTTLE core team.\n\n");
3421         endBody += "Please do not answer this message directly, it is automatically generated.\n\n";
3422         endBody += "Greetings,\n\n \t\t\tthe SHUTTLE\n";
3423
3424         AliDebug(2, Form("Body end: %s", endBody.Data()));
3425
3426         mailBody << endBody.Data();
3427
3428         mailBody.close();
3429
3430         // send mail!
3431         TString mailCommand = Form("mail -s \"%s\" -c %s %s < %s",
3432                                                 subject.Data(),
3433                                                 cc.Data(),
3434                                                 to.Data(),
3435                                                 bodyFileName.Data());
3436         AliDebug(2, Form("mail command: %s", mailCommand.Data()));
3437
3438         Bool_t result = gSystem->Exec(mailCommand.Data());
3439
3440         return result == 0;
3441 }
3442
3443 //______________________________________________________________________________________________
3444 Bool_t AliShuttle::SendMailToDCS()
3445 {
3446         //
3447         // sends a mail to the DCS Amanda experts in case of DCS data point retrieval error
3448         //
3449         
3450         if (fTestMode != kNone)
3451                 return kTRUE;
3452
3453         if (!fConfig->SendMail()) return kTRUE;
3454
3455         void* dir = gSystem->OpenDirectory(GetShuttleLogDir());
3456         if (dir == NULL)
3457         {
3458                 if (gSystem->mkdir(GetShuttleLogDir(), kTRUE))
3459                 {
3460                         Log("SHUTTLE", Form("SendMailToDCS - Can't open directory <%s>", GetShuttleLogDir()));
3461                         return kFALSE;
3462                 }
3463
3464         } else {
3465                 gSystem->FreeDirectory(dir);
3466         }
3467
3468         TString bodyFileName;
3469         bodyFileName.Form("%s/mail.body", GetShuttleLogDir());
3470         gSystem->ExpandPathName(bodyFileName);
3471
3472         ofstream mailBody;
3473         mailBody.open(bodyFileName, ofstream::out);
3474
3475         if (!mailBody.is_open())
3476         {
3477                 Log("SHUTTLE", Form("SendMailToDCS - Could not open mail body file %s", bodyFileName.Data()));
3478                 return kFALSE;
3479         }
3480
3481         TString to="";
3482         TIter iterExperts(fConfig->GetAdmins(AliShuttleConfig::kAmanda));
3483         TObjString *anExpert=0;
3484         while ((anExpert = (TObjString*) iterExperts.Next()))
3485         {
3486                 to += Form("%s,", anExpert->GetName());
3487         }
3488         if (to.Length() > 0)
3489           to.Remove(to.Length()-1);
3490         AliDebug(2, Form("to: %s",to.Data()));
3491
3492         if (to.IsNull()) {
3493                 Log("SHUTTLE", "List of Amanda server administrators not set!");
3494                 return kFALSE;
3495         }
3496
3497         TString cc="";
3498         TIter iterAdmins(fConfig->GetAdmins(AliShuttleConfig::kGlobal));
3499         TObjString *anAdmin=0;
3500         while ((anAdmin = (TObjString*) iterAdmins.Next()))
3501         {
3502                 cc += Form("%s,", anAdmin->GetName());
3503         }
3504         if (cc.Length() > 0)
3505           cc.Remove(to.Length()-1);
3506         AliDebug(2, Form("cc: %s",to.Data()));
3507
3508         TString subject = Form("Retrieval of data points for %s FAILED in run %d !",
3509                                 fCurrentDetector.Data(), GetCurrentRun());
3510         AliDebug(2, Form("subject: %s", subject.Data()));
3511
3512         TString body = Form("Dear DCS experts, \n\n");
3513         body += Form("SHUTTLE couldn\'t retrieve the data points for detector %s "
3514                         "in run %d!!\n\n", fCurrentDetector.Data(), GetCurrentRun());
3515         body += Form("Please check %s status on the SHUTTLE monitoring page: \n\n", 
3516                                 fCurrentDetector.Data());
3517         if (fConfig->GetRunMode() == AliShuttleConfig::kTest)
3518         {
3519                 body += Form("\thttp://pcalimonitor.cern.ch:8889/shuttle.jsp?time=168 \n\n");
3520         } else {
3521                 body += Form("\thttp://pcalimonitor.cern.ch/shuttle.jsp?instance=PROD?time=168 \n\n");
3522         }
3523
3524         TString logFolder = "logs";
3525         if (fConfig->GetRunMode() == AliShuttleConfig::kProd) 
3526                 logFolder += "_PROD";
3527         
3528         
3529         body += Form("Find the %s log for the current run on \n\n"
3530                 "\thttp://pcalishuttle01.cern.ch:8880/%s/%d/%s_%d.log \n\n", 
3531                 fCurrentDetector.Data(), logFolder.Data(), GetCurrentRun(), 
3532                                 fCurrentDetector.Data(), GetCurrentRun());
3533         body += Form("The last 10 lines of %s log file are following:\n\n", fCurrentDetector.Data());
3534
3535         AliDebug(2, Form("Body begin: %s", body.Data()));
3536
3537         mailBody << body.Data();
3538         mailBody.close();
3539         mailBody.open(bodyFileName, ofstream::out | ofstream::app);
3540
3541         TString logFileName = Form("%s/%d/%s_%d.log", GetShuttleLogDir(), GetCurrentRun(),
3542                 fCurrentDetector.Data(), GetCurrentRun());
3543         TString tailCommand = Form("tail -n 10 %s >> %s", logFileName.Data(), bodyFileName.Data());
3544         if (gSystem->Exec(tailCommand.Data()))
3545         {
3546                 mailBody << Form("%s log file not found ...\n\n", fCurrentDetector.Data());
3547         }
3548
3549         TString endBody = Form("------------------------------------------------------\n\n");
3550         endBody += Form("In case of problems please contact the SHUTTLE core team.\n\n");
3551         endBody += "Please do not answer this message directly, it is automatically generated.\n\n";
3552         endBody += "Greetings,\n\n \t\t\tthe SHUTTLE\n";
3553
3554         AliDebug(2, Form("Body end: %s", endBody.Data()));
3555
3556         mailBody << endBody.Data();
3557
3558         mailBody.close();
3559
3560         // send mail!
3561         TString mailCommand = Form("mail -s \"%s\" -c %s %s < %s",
3562                                                 subject.Data(),
3563                                                 cc.Data(),
3564                                                 to.Data(),
3565                                                 bodyFileName.Data());
3566         AliDebug(2, Form("mail command: %s", mailCommand.Data()));
3567
3568         Bool_t result = gSystem->Exec(mailCommand.Data());
3569
3570         return result == 0;
3571 }
3572
3573 //______________________________________________________________________________________________
3574 const char* AliShuttle::GetRunType()
3575 {
3576         //
3577         // returns run type read from "run type" logbook
3578         //
3579
3580         if(!fLogbookEntry) {
3581                 AliError("No logbook entry!");
3582                 return 0;
3583         }
3584
3585         return fLogbookEntry->GetRunType();
3586 }
3587
3588 //______________________________________________________________________________________________
3589 Bool_t AliShuttle::GetHLTStatus()
3590 {
3591         // Return HLT status (ON=1 OFF=0)
3592         // Converts the HLT status from the status string read in the run logbook (not just a bool)
3593
3594         if(!fLogbookEntry) {
3595                 AliError("No logbook entry!");
3596                 return 0;
3597         }
3598
3599         // TODO implement when HLTStatus is inserted in run logbook
3600         //TString hltStatus = fLogbookEntry->GetRunParameter("HLTStatus");
3601         //if(hltStatus == "OFF") {return kFALSE};
3602
3603         return kTRUE;
3604 }
3605
3606 //______________________________________________________________________________________________
3607 void AliShuttle::SetShuttleTempDir(const char* tmpDir)
3608 {
3609         //
3610         // sets Shuttle temp directory
3611         //
3612
3613         fgkShuttleTempDir = gSystem->ExpandPathName(tmpDir);
3614 }
3615
3616 //______________________________________________________________________________________________
3617 void AliShuttle::SetShuttleLogDir(const char* logDir)
3618 {
3619         //
3620         // sets Shuttle log directory
3621         //
3622
3623         fgkShuttleLogDir = gSystem->ExpandPathName(logDir);
3624 }