]> git.uio.no Git - u/mrichter/AliRoot.git/blob - SHUTTLE/AliShuttle.cxx
More meaningful log message added in GetFileSources
[u/mrichter/AliRoot.git] / SHUTTLE / AliShuttle.cxx
1 /**************************************************************************
2  * Copyright(c) 1998-1999, ALICE Experiment at CERN, All rights reserved. *
3  *                                                                        *
4  * Author: The ALICE Off-line Project.                                    *
5  * Contributors are mentioned in the code where appropriate.              *
6  *                                                                        *
7  * Permission to use, copy, modify and distribute this software and its   *
8  * documentation strictly for non-commercial purposes is hereby granted   *
9  * without fee, provided that the above copyright notice appears in all   *
10  * copies and that both the copyright notice and this permission notice   *
11  * appear in the supporting documentation. The authors make no claims     *
12  * about the suitability of this software for any purpose. It is          *
13  * provided "as is" without express or implied warranty.                  *
14  **************************************************************************/
15
16 /*
17 $Log$
18 Revision 1.76  2007/12/19 07:45:20  acolla
19 bug fix in the name of the raw tag files (Raw instead of raw)
20
21 Revision 1.75  2007/12/18 15:42:14  jgrosseo
22 adding number of open runs to monitoring
23
24 Revision 1.74  2007/12/17 03:23:32  jgrosseo
25 several bugfixes
26 added "empty preprocessor" as placeholder for Acorde in FDR
27
28 Revision 1.73  2007/12/14 19:31:36  acolla
29 Sending email to DCS experts is temporarily commented
30
31 Revision 1.72  2007/12/13 15:44:28  acolla
32 Run type added in mail sent to detector expert (eases understanding)
33
34 Revision 1.71  2007/12/12 14:56:14  jgrosseo
35 sending shuttle_ignore to ML also in case of 0 events
36
37 Revision 1.70  2007/12/12 13:45:35  acolla
38 Monalisa started in Collect() function. Alive message to monitor is sent at each Collect and every minute during preprocessor processing.
39
40 Revision 1.69  2007/12/12 10:06:29  acolla
41 in AliShuttle.cxx: SHUTTLE logbook is updated in case of invalid run times:
42
43 time_start==0 && time_end==0
44
45 logbook is NOT updated if time_start != 0 && time_end == 0, because it may mean that the run is still ongoing.
46
47 Revision 1.68  2007/12/11 10:15:17  acolla
48 Added marking SHUTTLE=DONE for invalid runs
49 (invalid start time or end time) and runs with totalEvents < 1
50
51 Revision 1.67  2007/12/07 19:14:36  acolla
52 in AliShuttleTrigger:
53
54 Added automatic collection of new runs on a regular time basis (settable from the configuration)
55
56 in AliShuttleConfig: new members
57
58 - triggerWait: time to wait for DIM trigger (s) before starting automatic collection of new runs
59 - mode: run mode (test, prod) -> used to build log folder (logs or logs_PROD)
60
61 in AliShuttle:
62
63 - logs now stored in logs/#RUN/DET_#RUN.log
64
65 Revision 1.66  2007/12/05 10:45:19  jgrosseo
66 changed order of arguments to TMonaLisaWriter
67
68 Revision 1.65  2007/11/26 16:58:37  acolla
69 Monalisa configuration added: host and table name
70
71 Revision 1.64  2007/11/13 16:15:47  acolla
72 DCS map is stored in a file in the temp folder where the detector is processed.
73 If the preprocessor fails, the temp folder is not removed. This will help the debugging of the problem.
74
75 Revision 1.63  2007/11/02 10:53:16  acolla
76 Protection added to AliShuttle::CopyFileLocally
77
78 Revision 1.62  2007/10/31 18:23:13  acolla
79 Furter developement on the Shuttle:
80
81 - Shuttle now connects to the Grid as alidaq. The OCDB and Reference folders
82 are now built from /alice/data, e.g.:
83 /alice/data/2007/LHC07a/OCDB
84
85 the year and LHC period are taken from the Shuttle.
86 Raw metadata files are stored by GRP to:
87 /alice/data/2007/LHC07a/<runNb>/Raw/RunMetadata.root
88
89 - Shuttle sends a mail to DCS experts each time DP retrieval fails.
90
91 Revision 1.61  2007/10/30 20:33:51  acolla
92 Improved managing of temporary folders, which weren't correctly handled.
93 Resolved bug introduced in StoreReferenceFile, which caused SPD preprocessor fail.
94
95 Revision 1.60  2007/10/29 18:06:16  acolla
96
97 New function StoreRunMetadataFile added to preprocessor and Shuttle interface
98 This function can be used by GRP only. It stores raw data tags merged file to the
99 raw data folder (e.g. /alice/data/2008/LHC08a/000099999/Raw).
100
101 KNOWN ISSUES:
102
103 1. Shuttle cannot write to /alice/data/ because it belongs to alidaq. Tag file is stored in /alice/simulation/... for the time being.
104 2. Due to a bug in TAlien::Mkdir, the creation of a folder in recursive mode (-p option) does not work. The problem
105 has been corrected in the root package on the Shuttle machine.
106
107 Revision 1.59  2007/10/05 12:40:55  acolla
108
109 Result error code added to AliDCSClient data members (it was "lost" with the new implementation of TMap* GetAliasValues and GetDPValues).
110
111 Revision 1.58  2007/09/28 15:27:40  acolla
112
113 AliDCSClient "multiSplit" option added in the DCS configuration
114 in AliDCSMessage: variable MAX_BODY_SIZE set to 500000
115
116 Revision 1.57  2007/09/27 16:53:13  acolla
117 Detectors can have more than one AMANDA server. SHUTTLE queries the servers sequentially,
118 merges the dcs aliases/DPs in one TMap and sends it to the preprocessor.
119
120 Revision 1.56  2007/09/14 16:46:14  jgrosseo
121 1) Connect and Close are called before and after each query, so one can
122 keep the same AliDCSClient object.
123 2) The splitting of a query is moved to GetDPValues/GetAliasValues.
124 3) Splitting interval can be specified in constructor
125
126 Revision 1.55  2007/08/06 12:26:40  acolla
127 Function Bool_t GetHLTStatus added to preprocessor. It returns the status of HLT
128 read from the run logbook.
129
130 Revision 1.54  2007/07/12 09:51:25  jgrosseo
131 removed duplicated log message in GetFile
132
133 Revision 1.53  2007/07/12 09:26:28  jgrosseo
134 updating hlt fxs base path
135
136 Revision 1.52  2007/07/12 08:06:45  jgrosseo
137 adding log messages in getfile... functions
138 adding not implemented copy constructor in alishuttleconfigholder
139
140 Revision 1.51  2007/07/03 17:24:52  acolla
141 root moved to v5-16-00. TFileMerger->Cp moved to TFile::Cp.
142
143 Revision 1.50  2007/07/02 17:19:32  acolla
144 preprocessor is run in a temp directory that is removed when process is finished.
145
146 Revision 1.49  2007/06/29 10:45:06  acolla
147 Number of columns in MySql Shuttle logbook increased by one (HLT added)
148
149 Revision 1.48  2007/06/21 13:06:19  acolla
150 GetFileSources returns dummy list with 1 source if system=DCS (better than
151 returning error as it was)
152
153 Revision 1.47  2007/06/19 17:28:56  acolla
154 HLT updated; missing map bug removed.
155
156 Revision 1.46  2007/06/09 13:01:09  jgrosseo
157 Switching to retrieval of several DCS DPs at a time (multiDPrequest)
158
159 Revision 1.45  2007/05/30 06:35:20  jgrosseo
160 Adding functionality to the Shuttle/TestShuttle:
161 o) Function to retrieve list of sources from a given system (GetFileSources with id=0)
162 o) Function to retrieve list of IDs for a given source      (GetFileIDs)
163 These functions are needed for dealing with the tag files that are saved for the GRP preprocessor
164 Example code has been added to the TestProcessor in TestShuttle
165
166 Revision 1.44  2007/05/11 16:09:32  acolla
167 Reference files for ITS, MUON and PHOS are now stored in OfflineDetName/OnlineDetName/run_...
168 example: ITS/SPD/100_filename.root
169
170 Revision 1.43  2007/05/10 09:59:51  acolla
171 Various bug fixes in StoreRefFilesToGrid; Cleaning of reference storage before processing detector (CleanReferenceStorage)
172
173 Revision 1.42  2007/05/03 08:01:39  jgrosseo
174 typo in last commit :-(
175
176 Revision 1.41  2007/05/03 08:00:48  jgrosseo
177 fixing log message when pp want to skip dcs value retrieval
178
179 Revision 1.40  2007/04/27 07:06:48  jgrosseo
180 GetFileSources returns empty list in case of no files, but successful query
181 No mails sent in testmode
182
183 Revision 1.39  2007/04/17 12:43:57  acolla
184 Correction in StoreOCDB; change of text in mail to detector expert
185
186 Revision 1.38  2007/04/12 08:26:18  jgrosseo
187 updated comment
188
189 Revision 1.37  2007/04/10 16:53:14  jgrosseo
190 redirecting sub detector stdout, stderr to sub detector log file
191
192 Revision 1.35  2007/04/04 16:26:38  acolla
193 1. Re-organization of function calls in TestPreprocessor to make it more meaningful.
194 2. Added missing dependency in test preprocessors.
195 3. in AliShuttle.cxx: processing time and memory consumption info on a single line.
196
197 Revision 1.34  2007/04/04 10:33:36  jgrosseo
198 1) Storing of files to the Grid is now done _after_ your preprocessors succeeded. This is transparent, which means that you can still use the same functions (Store, StoreReferenceData) to store files to the Grid. However, the Shuttle first stores them locally and transfers them after the preprocessor finished. The return code of these two functions has changed from UInt_t to Bool_t which gives you the success of the storing.
199 In case of an error with the Grid, the Shuttle will retry the storing later, the preprocessor does not need to be run again.
200
201 2) The meaning of the return code of the preprocessor has changed. 0 is now success and any other value means failure. This value is stored in the log and you can use it to keep details about the error condition.
202
203 3) New function StoreReferenceFile to _directly_ store a file (without opening it) to the reference storage.
204
205 4) The memory usage of the preprocessor is monitored. If it exceeds 2 GB it is terminated.
206
207 5) New function AliPreprocessor::ProcessDCS(). If you do not need to have DCS data in all cases, you can skip the processing by implemting this function and returning kFALSE under certain conditions. E.g. if there is a certain run type.
208 If you always need DCS data (like before), you do not need to implement it.
209
210 6) The run type has been added to the monitoring page
211
212 Revision 1.33  2007/04/03 13:56:01  acolla
213 Grid Storage at the end of preprocessing. Added virtual method to disable DCS query according to the
214 run type.
215
216 Revision 1.32  2007/02/28 10:41:56  acolla
217 Run type field added in SHUTTLE framework. Run type is read from "run type" logbook and retrieved by
218 AliPreprocessor::GetRunType() function.
219 Added some ldap definition files.
220
221 Revision 1.30  2007/02/13 11:23:21  acolla
222 Moved getters and setters of Shuttle's main OCDB/Reference, local
223 OCDB/Reference, temp and log folders to AliShuttleInterface
224
225 Revision 1.27  2007/01/30 17:52:42  jgrosseo
226 adding monalisa monitoring
227
228 Revision 1.26  2007/01/23 19:20:03  acolla
229 Removed old ldif files, added TOF, MCH ldif files. Added some options in
230 AliShuttleConfig::Print. Added in Ali Shuttle: SetShuttleTempDir and
231 SetShuttleLogDir
232
233 Revision 1.25  2007/01/15 19:13:52  acolla
234 Moved some AliInfo to AliDebug in SendMail function
235
236 Revision 1.21  2006/12/07 08:51:26  jgrosseo
237 update (alberto):
238 table, db names in ldap configuration
239 added GRP preprocessor
240 DCS data can also be retrieved by data point
241
242 Revision 1.20  2006/11/16 16:16:48  jgrosseo
243 introducing strict run ordering flag
244 removed giving preprocessor name to preprocessor, they have to know their name themselves ;-)
245
246 Revision 1.19  2006/11/06 14:23:04  jgrosseo
247 major update (Alberto)
248 o) reading of run parameters from the logbook
249 o) online offline naming conversion
250 o) standalone DCSclient package
251
252 Revision 1.18  2006/10/20 15:22:59  jgrosseo
253 o) Adding time out to the execution of the preprocessors: The Shuttle forks and the parent process monitors the child
254 o) Merging Collect, CollectAll, CollectNew function
255 o) Removing implementation of empty copy constructors (declaration still there!)
256
257 Revision 1.17  2006/10/05 16:20:55  jgrosseo
258 adapting to new CDB classes
259
260 Revision 1.16  2006/10/05 15:46:26  jgrosseo
261 applying to the new interface
262
263 Revision 1.15  2006/10/02 16:38:39  jgrosseo
264 update (alberto):
265 fixed memory leaks
266 storing of objects that failed to be stored to the grid before
267 interfacing of shuttle status table in daq system
268
269 Revision 1.14  2006/08/29 09:16:05  jgrosseo
270 small update
271
272 Revision 1.13  2006/08/15 10:50:00  jgrosseo
273 effc++ corrections (alberto)
274
275 Revision 1.12  2006/08/08 14:19:29  jgrosseo
276 Update to shuttle classes (Alberto)
277
278 - Possibility to set the full object's path in the Preprocessor's and
279 Shuttle's  Store functions
280 - Possibility to extend the object's run validity in the same classes
281 ("startValidity" and "validityInfinite" parameters)
282 - Implementation of the StoreReferenceData function to store reference
283 data in a dedicated CDB storage.
284
285 Revision 1.11  2006/07/21 07:37:20  jgrosseo
286 last run is stored after each run
287
288 Revision 1.10  2006/07/20 09:54:40  jgrosseo
289 introducing status management: The processing per subdetector is divided into several steps,
290 after each step the status is stored on disk. If the system crashes in any of the steps the Shuttle
291 can keep track of the number of failures and skips further processing after a certain threshold is
292 exceeded. These thresholds can be configured in LDAP.
293
294 Revision 1.9  2006/07/19 10:09:55  jgrosseo
295 new configuration, accesst to DAQ FES (Alberto)
296
297 Revision 1.8  2006/07/11 12:44:36  jgrosseo
298 adding parameters for extended validity range of data produced by preprocessor
299
300 Revision 1.7  2006/07/10 14:37:09  jgrosseo
301 small fix + todo comment
302
303 Revision 1.6  2006/07/10 13:01:41  jgrosseo
304 enhanced storing of last sucessfully processed run (alberto)
305
306 Revision 1.5  2006/07/04 14:59:57  jgrosseo
307 revision of AliDCSValue: Removed wrapper classes, reduced storage size per value by factor 2
308
309 Revision 1.4  2006/06/12 09:11:16  jgrosseo
310 coding conventions (Alberto)
311
312 Revision 1.3  2006/06/06 14:26:40  jgrosseo
313 o) removed files that were moved to STEER
314 o) shuttle updated to follow the new interface (Alberto)
315
316 Revision 1.2  2006/03/07 07:52:34  hristov
317 New version (B.Yordanov)
318
319 Revision 1.6  2005/11/19 17:19:14  byordano
320 RetrieveDATEEntries and RetrieveConditionsData added
321
322 Revision 1.5  2005/11/19 11:09:27  byordano
323 AliShuttle declaration added
324
325 Revision 1.4  2005/11/17 17:47:34  byordano
326 TList changed to TObjArray
327
328 Revision 1.3  2005/11/17 14:43:23  byordano
329 import to local CVS
330
331 Revision 1.1.1.1  2005/10/28 07:33:58  hristov
332 Initial import as subdirectory in AliRoot
333
334 Revision 1.2  2005/09/13 08:41:15  byordano
335 default startTime endTime added
336
337 Revision 1.4  2005/08/30 09:13:02  byordano
338 some docs added
339
340 Revision 1.3  2005/08/29 21:15:47  byordano
341 some docs added
342
343 */
344
345 //
346 // This class is the main manager for AliShuttle. 
347 // It organizes the data retrieval from DCS and call the 
348 // interface methods of AliPreprocessor.
349 // For every detector in AliShuttleConfgi (see AliShuttleConfig),
350 // data for its set of aliases is retrieved. If there is registered
351 // AliPreprocessor for this detector then it will be used
352 // accroding to the schema (see AliPreprocessor).
353 // If there isn't registered AliPreprocessor than the retrieved
354 // data is stored automatically to the undelying AliCDBStorage.
355 // For detSpec is used the alias name.
356 //
357
358 #include "AliShuttle.h"
359
360 #include "AliCDBManager.h"
361 #include "AliCDBStorage.h"
362 #include "AliCDBId.h"
363 #include "AliCDBRunRange.h"
364 #include "AliCDBPath.h"
365 #include "AliCDBEntry.h"
366 #include "AliShuttleConfig.h"
367 #include "DCSClient/AliDCSClient.h"
368 #include "AliLog.h"
369 #include "AliPreprocessor.h"
370 #include "AliShuttleStatus.h"
371 #include "AliShuttleLogbookEntry.h"
372
373 #include <TSystem.h>
374 #include <TObject.h>
375 #include <TString.h>
376 #include <TTimeStamp.h>
377 #include <TObjString.h>
378 #include <TSQLServer.h>
379 #include <TSQLResult.h>
380 #include <TSQLRow.h>
381 #include <TMutex.h>
382 #include <TSystemDirectory.h>
383 #include <TSystemFile.h>
384 #include <TFile.h>
385 #include <TGrid.h>
386 #include <TGridResult.h>
387
388 #include <TMonaLisaWriter.h>
389
390 #include <fstream>
391
392 #include <sys/types.h>
393 #include <sys/wait.h>
394
395 ClassImp(AliShuttle)
396
397 //______________________________________________________________________________________________
398 AliShuttle::AliShuttle(const AliShuttleConfig* config,
399                 UInt_t timeout, Int_t retries):
400 fConfig(config),
401 fTimeout(timeout), fRetries(retries),
402 fPreprocessorMap(),
403 fLogbookEntry(0),
404 fCurrentDetector(),
405 fStatusEntry(0),
406 fMonitoringMutex(0),
407 fLastActionTime(0),
408 fLastAction(),
409 fMonaLisa(0),
410 fTestMode(kNone),
411 fReadTestMode(kFALSE),
412 fOutputRedirected(kFALSE)
413 {
414         //
415         // config: AliShuttleConfig used
416         // timeout: timeout used for AliDCSClient connection
417         // retries: the number of retries in case of connection error.
418         //
419
420         if (!fConfig->IsValid()) AliFatal("********** !!!!! Invalid configuration !!!!! **********");
421         for(int iSys=0;iSys<4;iSys++) {
422                 fServer[iSys]=0;
423                 if (iSys < 3)
424                         fFXSlist[iSys].SetOwner(kTRUE);
425         }
426         fPreprocessorMap.SetOwner(kTRUE);
427
428         for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
429                 fFirstUnprocessed[iDet] = kFALSE;
430
431         fMonitoringMutex = new TMutex();
432 }
433
434 //______________________________________________________________________________________________
435 AliShuttle::~AliShuttle()
436 {
437         //
438         // destructor
439         //
440
441         fPreprocessorMap.DeleteAll();
442         for(int iSys=0;iSys<4;iSys++)
443                 if(fServer[iSys]) {
444                         fServer[iSys]->Close();
445                         delete fServer[iSys];
446                         fServer[iSys] = 0;
447                 }
448
449         if (fStatusEntry){
450                 delete fStatusEntry;
451                 fStatusEntry = 0;
452         }
453         
454         if (fMonitoringMutex) 
455         {
456                 delete fMonitoringMutex;
457                 fMonitoringMutex = 0;
458         }
459 }
460
461 //______________________________________________________________________________________________
462 void AliShuttle::RegisterPreprocessor(AliPreprocessor* preprocessor)
463 {
464         //
465         // Registers new AliPreprocessor.
466         // It uses GetName() for indentificator of the pre processor.
467         // The pre processor is registered it there isn't any other
468         // with the same identificator (GetName()).
469         //
470
471         const char* detName = preprocessor->GetName();
472         if(GetDetPos(detName) < 0)
473                 AliFatal(Form("********** !!!!! Invalid detector name: %s !!!!! **********", detName));
474
475         if (fPreprocessorMap.GetValue(detName)) {
476                 AliWarning(Form("AliPreprocessor %s is already registered!", detName));
477                 return;
478         }
479
480         fPreprocessorMap.Add(new TObjString(detName), preprocessor);
481 }
482 //______________________________________________________________________________________________
483 Bool_t AliShuttle::Store(const AliCDBPath& path, TObject* object,
484                 AliCDBMetaData* metaData, Int_t validityStart, Bool_t validityInfinite)
485 {
486         // Stores a CDB object in the storage for offline reconstruction. Objects that are not needed for
487         // offline reconstruction, but should be stored anyway (e.g. for debugging) should NOT be stored
488         // using this function. Use StoreReferenceData instead!
489         // It calls StoreLocally function which temporarily stores the data locally; when the preprocessor
490         // finishes the data are transferred to the main storage (Grid).
491
492         return StoreLocally(fgkLocalCDB, path, object, metaData, validityStart, validityInfinite);
493 }
494
495 //______________________________________________________________________________________________
496 Bool_t AliShuttle::StoreReferenceData(const AliCDBPath& path, TObject* object, AliCDBMetaData* metaData)
497 {
498         // Stores a CDB object in the storage for reference data. This objects will not be available during
499         // offline reconstrunction. Use this function for reference data only!
500         // It calls StoreLocally function which temporarily stores the data locally; when the preprocessor
501         // finishes the data are transferred to the main storage (Grid).
502
503         return StoreLocally(fgkLocalRefStorage, path, object, metaData);
504 }
505
506 //______________________________________________________________________________________________
507 Bool_t AliShuttle::StoreLocally(const TString& localUri,
508                         const AliCDBPath& path, TObject* object, AliCDBMetaData* metaData,
509                         Int_t validityStart, Bool_t validityInfinite)
510 {
511         // Store object temporarily in local storage. Parameters are passed by Store and StoreReferenceData functions.
512         // when the preprocessor finishes the data are transferred to the main storage (Grid).
513         // The parameters are:
514         //   1) Uri of the backup storage (Local)
515         //   2) the object's path.
516         //   3) the object to be stored
517         //   4) the metaData to be associated with the object
518         //   5) the validity start run number w.r.t. the current run,
519         //      if the data is valid only for this run leave the default 0
520         //   6) specifies if the calibration data is valid for infinity (this means until updated),
521         //      typical for calibration runs, the default is kFALSE
522         //
523         // returns 0 if fail, 1 otherwise
524
525         if (fTestMode & kErrorStorage)
526         {
527                 Log(fCurrentDetector, "StoreLocally - In TESTMODE - Simulating error while storing locally");
528                 return kFALSE;
529         }
530         
531         const char* cdbType = (localUri == fgkLocalCDB) ? "CDB" : "Reference";
532
533         Int_t firstRun = GetCurrentRun() - validityStart;
534         if(firstRun < 0) {
535                 AliWarning("First valid run happens to be less than 0! Setting it to 0.");
536                 firstRun=0;
537         }
538
539         Int_t lastRun = -1;
540         if(validityInfinite) {
541                 lastRun = AliCDBRunRange::Infinity();
542         } else {
543                 lastRun = GetCurrentRun();
544         }
545
546         // Version is set to current run, it will be used later to transfer data to Grid
547         AliCDBId id(path, firstRun, lastRun, GetCurrentRun(), -1);
548
549         if(! dynamic_cast<TObjString*> (metaData->GetProperty("RunUsed(TObjString)"))){
550                 TObjString runUsed = Form("%d", GetCurrentRun());
551                 metaData->SetProperty("RunUsed(TObjString)", runUsed.Clone());
552         }
553
554         Bool_t result = kFALSE;
555
556         if (!(AliCDBManager::Instance()->GetStorage(localUri))) {
557                 Log("SHUTTLE", Form("StoreLocally - Cannot activate local %s storage", cdbType));
558         } else {
559                 result = AliCDBManager::Instance()->GetStorage(localUri)
560                                         ->Put(object, id, metaData);
561         }
562
563         if(!result) {
564
565                 Log(fCurrentDetector, Form("StoreLocally - Can't store object <%s>!", id.ToString().Data()));
566         }
567
568         return result;
569 }
570
571 //______________________________________________________________________________________________
572 Bool_t AliShuttle::StoreOCDB()
573 {
574         //
575         // Called when preprocessor ends successfully or when previous storage attempt failed (kStoreError status)
576         // Calls underlying StoreOCDB(const char*) function twice, for OCDB and Reference storage.
577         // Then calls StoreRefFilesToGrid to store reference files. 
578         //
579         
580         if (fTestMode & kErrorGrid)
581         {
582                 Log("SHUTTLE", "StoreOCDB - In TESTMODE - Simulating error while storing in the Grid");
583                 Log(fCurrentDetector, "StoreOCDB - In TESTMODE - Simulating error while storing in the Grid");
584                 return kFALSE;
585         }
586         
587         Log("SHUTTLE","StoreOCDB - Storing OCDB data ...");
588         Bool_t resultCDB = StoreOCDB(fgkMainCDB);
589
590         Log("SHUTTLE","StoreOCDB - Storing reference data ...");
591         Bool_t resultRef = StoreOCDB(fgkMainRefStorage);
592         
593         Log("SHUTTLE","StoreOCDB - Storing reference files ...");
594         Bool_t resultRefFiles = CopyFilesToGrid("reference");
595         
596         Bool_t resultMetadata = kTRUE;
597         if(fCurrentDetector == "GRP") 
598         {
599                 Log("StoreOCDB - SHUTTLE","Storing Run Metadata file ...");
600                 resultMetadata = CopyFilesToGrid("metadata");
601         }
602         
603         return resultCDB && resultRef && resultRefFiles && resultMetadata;
604 }
605
606 //______________________________________________________________________________________________
607 Bool_t AliShuttle::StoreOCDB(const TString& gridURI)
608 {
609         //
610         // Called by StoreOCDB(), performs actual storage to the main OCDB and reference storages (Grid)
611         //
612
613         TObjArray* gridIds=0;
614
615         Bool_t result = kTRUE;
616
617         const char* type = 0;
618         TString localURI;
619         if(gridURI == fgkMainCDB) {
620                 type = "OCDB";
621                 localURI = fgkLocalCDB;
622         } else if(gridURI == fgkMainRefStorage) {
623                 type = "reference";
624                 localURI = fgkLocalRefStorage;
625         } else {
626                 AliError(Form("Invalid storage URI: %s", gridURI.Data()));
627                 return kFALSE;
628         }
629
630         AliCDBManager* man = AliCDBManager::Instance();
631
632         AliCDBStorage *gridSto = man->GetStorage(gridURI);
633         if(!gridSto) {
634                 Log("SHUTTLE",
635                         Form("StoreOCDB - cannot activate main %s storage", type));
636                 return kFALSE;
637         }
638
639         gridIds = gridSto->GetQueryCDBList();
640
641         // get objects previously stored in local CDB
642         AliCDBStorage *localSto = man->GetStorage(localURI);
643         if(!localSto) {
644                 Log("SHUTTLE",
645                         Form("StoreOCDB - cannot activate local %s storage", type));
646                 return kFALSE;
647         }
648         AliCDBPath aPath(GetOfflineDetName(fCurrentDetector.Data()),"*","*");
649         // Local objects were stored with current run as Grid version!
650         TList* localEntries = localSto->GetAll(aPath.GetPath(), GetCurrentRun(), GetCurrentRun());
651         localEntries->SetOwner(1);
652
653         // loop on local stored objects
654         TIter localIter(localEntries);
655         AliCDBEntry *aLocEntry = 0;
656         while((aLocEntry = dynamic_cast<AliCDBEntry*> (localIter.Next()))){
657                 aLocEntry->SetOwner(1);
658                 AliCDBId aLocId = aLocEntry->GetId();
659                 aLocEntry->SetVersion(-1);
660                 aLocEntry->SetSubVersion(-1);
661
662                 // If local object is valid up to infinity we store it only if it is
663                 // the first unprocessed run!
664                 if (aLocId.GetLastRun() == AliCDBRunRange::Infinity() &&
665                         !fFirstUnprocessed[GetDetPos(fCurrentDetector)])
666                 {
667                         Log("SHUTTLE", Form("StoreOCDB - %s: object %s has validity infinite but "
668                                                 "there are previous unprocessed runs!",
669                                                 fCurrentDetector.Data(), aLocId.GetPath().Data()));
670                         result = kFALSE;
671                         continue;
672                 }
673
674                 // loop on Grid valid Id's
675                 Bool_t store = kTRUE;
676                 TIter gridIter(gridIds);
677                 AliCDBId* aGridId = 0;
678                 while((aGridId = dynamic_cast<AliCDBId*> (gridIter.Next()))){
679                         if(aGridId->GetPath() != aLocId.GetPath()) continue;
680                         // skip all objects valid up to infinity
681                         if(aGridId->GetLastRun() == AliCDBRunRange::Infinity()) continue;
682                         // if we get here, it means there's already some more recent object stored on Grid!
683                         store = kFALSE;
684                         break;
685                 }
686
687                 // If we get here, the file can be stored!
688                 Bool_t storeOk = gridSto->Put(aLocEntry);
689                 if(!store || storeOk){
690
691                         if (!store)
692                         {
693                                 Log(fCurrentDetector.Data(),
694                                         Form("StoreOCDB - A more recent object already exists in %s storage: <%s>",
695                                                 type, aGridId->ToString().Data()));
696                         } else {
697                                 Log("SHUTTLE",
698                                         Form("StoreOCDB - Object <%s> successfully put into %s storage",
699                                                 aLocId.ToString().Data(), type));
700                                 Log(fCurrentDetector.Data(),
701                                         Form("StoreOCDB - Object <%s> successfully put into %s storage",
702                                                 aLocId.ToString().Data(), type));
703                         }
704
705                         // removing local filename...
706                         TString filename;
707                         localSto->IdToFilename(aLocId, filename);
708                         Log("SHUTTLE", Form("StoreOCDB - Removing local file %s", filename.Data()));
709                         RemoveFile(filename.Data());
710                         continue;
711                 } else  {
712                         Log("SHUTTLE",
713                                 Form("StoreOCDB - Grid %s storage of object <%s> failed",
714                                         type, aLocId.ToString().Data()));
715                         Log(fCurrentDetector.Data(),
716                                 Form("StoreOCDB - Grid %s storage of object <%s> failed",
717                                         type, aLocId.ToString().Data()));
718                         result = kFALSE;
719                 }
720         }
721         localEntries->Clear();
722
723         return result;
724 }
725
726 //______________________________________________________________________________________________
727 Bool_t AliShuttle::CleanReferenceStorage(const char* detector)
728 {
729         // clears the directory used to store reference files of a given subdetector
730   
731         AliCDBManager* man = AliCDBManager::Instance();
732         AliCDBStorage* sto = man->GetStorage(fgkLocalRefStorage);
733         TString localBaseFolder = sto->GetBaseFolder();
734
735         TString targetDir = GetRefFilePrefix(localBaseFolder.Data(), detector);
736         
737         Log("SHUTTLE", Form("CleanReferenceStorage - Cleaning %s", targetDir.Data()));
738
739         TString begin;
740         begin.Form("%d_", GetCurrentRun());
741         
742         TSystemDirectory* baseDir = new TSystemDirectory("/", targetDir);
743         if (!baseDir)
744                 return kTRUE;
745                 
746         TList* dirList = baseDir->GetListOfFiles();
747         delete baseDir;
748         
749         if (!dirList) return kTRUE;
750                         
751         if (dirList->GetEntries() < 3) 
752         {
753                 delete dirList;
754                 return kTRUE;
755         }
756                                 
757         Int_t nDirs = 0, nDel = 0;
758         TIter dirIter(dirList);
759         TSystemFile* entry = 0;
760
761         Bool_t success = kTRUE;
762         
763         while ((entry = dynamic_cast<TSystemFile*> (dirIter.Next())))
764         {                                       
765                 if (entry->IsDirectory())
766                         continue;
767                 
768                 TString fileName(entry->GetName());
769                 if (!fileName.BeginsWith(begin))
770                         continue;
771                         
772                 nDirs++;
773                                                 
774                 // delete file
775                 Int_t result = gSystem->Unlink(fileName.Data());
776                 
777                 if (result)
778                 {
779                         Log("SHUTTLE", Form("CleanReferenceStorage - Could not delete file %s!", fileName.Data()));
780                         success = kFALSE;
781                 } else {
782                         nDel++;
783                 }
784         }
785
786         if(nDirs > 0)
787                 Log("SHUTTLE", Form("CleanReferenceStorage - %d (over %d) reference files in folder %s were deleted.", 
788                         nDel, nDirs, targetDir.Data()));
789
790                 
791         delete dirList;
792         return success;
793
794
795
796
797
798
799   Int_t result = gSystem->GetPathInfo(targetDir, 0, (Long64_t*) 0, 0, 0);
800   if (result == 0)
801   {
802     // delete directory
803     result = gSystem->Exec(Form("rm -rf %s", targetDir.Data()));
804     if (result != 0)
805     {  
806       Log("SHUTTLE", Form("CleanReferenceStorage - Could not clean directory %s", targetDir.Data()));
807       return kFALSE;
808     }
809   }
810
811   result = gSystem->mkdir(targetDir, kTRUE);
812   if (result != 0)
813   {
814     Log("SHUTTLE", Form("CleanReferenceStorage - Error creating base directory %s", targetDir.Data()));
815     return kFALSE;
816   }
817         
818   return kTRUE;
819 }
820
821 //______________________________________________________________________________________________
822 Bool_t AliShuttle::StoreReferenceFile(const char* detector, const char* localFile, const char* gridFileName)
823 {
824         //
825         // Stores reference file directly (without opening it). This function stores the file locally.
826         //
827         // The file is stored under the following location: 
828         // <base folder of local reference storage>/<DET>/<RUN#>_<gridFileName>
829         // where <gridFileName> is the second parameter given to the function
830         // 
831         
832         if (fTestMode & kErrorStorage)
833         {
834                 Log(fCurrentDetector, "StoreReferenceFile - In TESTMODE - Simulating error while storing locally");
835                 return kFALSE;
836         }
837         
838         AliCDBManager* man = AliCDBManager::Instance();
839         AliCDBStorage* sto = man->GetStorage(fgkLocalRefStorage);
840         
841         TString localBaseFolder = sto->GetBaseFolder();
842         
843         TString target = GetRefFilePrefix(localBaseFolder.Data(), detector);    
844         target.Append(Form("/%d_%s", GetCurrentRun(), gridFileName));
845         
846         return CopyFileLocally(localFile, target);
847 }
848
849 //______________________________________________________________________________________________
850 Bool_t AliShuttle::StoreRunMetadataFile(const char* localFile, const char* gridFileName)
851 {
852         //
853         // Stores Run metadata file to the Grid, in the run folder
854         //
855         // Only GRP can call this function.
856         
857         if (fTestMode & kErrorStorage)
858         {
859                 Log(fCurrentDetector, "StoreRunMetaDataFile - In TESTMODE - Simulating error while storing locally");
860                 return kFALSE;
861         }
862         
863         AliCDBManager* man = AliCDBManager::Instance();
864         AliCDBStorage* sto = man->GetStorage(fgkLocalRefStorage);
865         
866         TString localBaseFolder = sto->GetBaseFolder();
867         
868         // Build Run level folder
869         // folder = /alice/data/year/lhcPeriod/runNb/raw
870         
871                 
872         TString lhcPeriod = GetLHCPeriod();     
873         if (lhcPeriod.Length() == 0) 
874         {
875                 Log("SHUTTLE","StoreRunMetaDataFile - LHCPeriod not found in logbook!");
876                 return 0;
877         }
878                 
879         TString target = Form("%s/GRP/RunMetadata/alice/data/%d/%s/%09d/raw/%s", 
880                                 localBaseFolder.Data(), GetCurrentYear(), 
881                                 lhcPeriod.Data(), GetCurrentRun(), gridFileName);
882                                         
883         return CopyFileLocally(localFile, target);
884 }
885
886 //______________________________________________________________________________________________
887 Bool_t AliShuttle::CopyFileLocally(const char* localFile, const TString& target)
888 {
889         //
890         // Stores file locally. Called by StoreReferenceFile and StoreRunMetadataFile
891         // Files are temporarily stored in the local reference storage. When the preprocessor 
892         // finishes, the Shuttle calls CopyFilesToGrid to transfer the files to AliEn 
893         // (in reference or run level folders)
894         //
895         
896         TString targetDir(target(0, target.Last('/')));
897         
898         //try to open base dir folder, if it does not exist
899         void* dir = gSystem->OpenDirectory(targetDir.Data());
900         if (dir == NULL) {
901                 if (gSystem->mkdir(targetDir.Data(), kTRUE)) {
902                         Log("SHUTTLE", Form("StoreFileLocally - Can't open directory <%s>", targetDir.Data()));
903                         return kFALSE;
904                 }
905
906         } else {
907                 gSystem->FreeDirectory(dir);
908         }
909         
910         Int_t result = 0;
911         
912         result = gSystem->GetPathInfo(localFile, 0, (Long64_t*) 0, 0, 0);
913         if (result)
914         {
915                 Log("SHUTTLE", Form("StoreFileLocally - %s does not exist", localFile));
916                 return kFALSE;
917         }
918
919         result = gSystem->GetPathInfo(target, 0, (Long64_t*) 0, 0, 0);
920         if (!result)
921         {
922                 Log("SHUTTLE", Form("StoreFileLocally - target file %s already exist, removing...", target.Data()));
923                 if (gSystem->Unlink(target.Data()))
924                 {
925                         Log("SHUTTLE", Form("StoreFileLocally - Could not remove existing target file %s!", target.Data()));
926                         return kFALSE;
927                 }
928         }       
929         
930         result = gSystem->CopyFile(localFile, target);
931
932         if (result == 0)
933         {
934                 Log("SHUTTLE", Form("StoreFileLocally - File %s stored locally to %s", localFile, target.Data()));
935                 return kTRUE;
936         }
937         else
938         {
939                 Log("SHUTTLE", Form("StoreFileLocally - Could not store file %s to %s! Error code = %d", 
940                                 localFile, target.Data(), result));
941                 return kFALSE;
942         }       
943
944
945
946 }
947
948 //______________________________________________________________________________________________
949 Bool_t AliShuttle::CopyFilesToGrid(const char* type)
950 {
951         //
952         // Transfers local files to the Grid. Local files can be reference files 
953         // or run metadata file (from GRP only).
954         //
955         // According to the type (ref, metadata) the files are stored under the following location: 
956         // ref --> <base folder of reference storage>/<DET>/<RUN#>_<gridFileName>
957         // metadata --> <run data folder>/<MetadataFileName>
958         //
959                 
960         AliCDBManager* man = AliCDBManager::Instance();
961         AliCDBStorage* sto = man->GetStorage(fgkLocalRefStorage);
962         if (!sto)
963                 return kFALSE;
964         TString localBaseFolder = sto->GetBaseFolder();
965         
966         TString dir;
967         TString alienDir;
968         TString begin;
969         
970         if (strcmp(type, "reference") == 0) 
971         {
972                 dir = GetRefFilePrefix(localBaseFolder.Data(), fCurrentDetector.Data());
973                 AliCDBStorage* gridSto = man->GetStorage(fgkMainRefStorage);
974                 if (!gridSto)
975                         return kFALSE;
976                 TString gridBaseFolder = gridSto->GetBaseFolder();
977                 alienDir = GetRefFilePrefix(gridBaseFolder.Data(), fCurrentDetector.Data());
978                 begin = Form("%d_", GetCurrentRun());
979         } 
980         else if (strcmp(type, "metadata") == 0)
981         {
982                         
983                 TString lhcPeriod = GetLHCPeriod();
984         
985                 if (lhcPeriod.Length() == 0) 
986                 {
987                         Log("SHUTTLE","CopyFilesToGrid - LHCPeriod not found in logbook!");
988                         return 0;
989                 }
990                 
991                 dir = Form("%s/GRP/RunMetadata/alice/data/%d/%s/%09d/raw", 
992                                 localBaseFolder.Data(), GetCurrentYear(), 
993                                 lhcPeriod.Data(), GetCurrentRun());
994                 alienDir = dir(dir.Index("/alice/data/"), dir.Length());
995                 
996                 begin = "";
997         }
998         else 
999         {
1000                 Log("SHUTTLE", "CopyFilesToGrid - Unexpected: type label must be reference or metadata!");
1001                 return kFALSE;
1002         }
1003                 
1004         TSystemDirectory* baseDir = new TSystemDirectory("/", dir);
1005         if (!baseDir)
1006                 return kTRUE;
1007                 
1008         TList* dirList = baseDir->GetListOfFiles();
1009         delete baseDir;
1010         
1011         if (!dirList) return kTRUE;
1012                 
1013         if (dirList->GetEntries() < 3) 
1014         {
1015                 delete dirList;
1016                 return kTRUE;
1017         }
1018                         
1019         if (!gGrid)
1020         { 
1021                 Log("SHUTTLE", "CopyFilesToGrid - Connection to Grid failed: Cannot continue!");
1022                 delete dirList;
1023                 return kFALSE;
1024         }
1025         
1026         Int_t nDirs = 0, nTransfer = 0;
1027         TIter dirIter(dirList);
1028         TSystemFile* entry = 0;
1029
1030         Bool_t success = kTRUE;
1031         Bool_t first = kTRUE;
1032         
1033         while ((entry = dynamic_cast<TSystemFile*> (dirIter.Next())))
1034         {                       
1035                 if (entry->IsDirectory())
1036                         continue;
1037                         
1038                 TString fileName(entry->GetName());
1039                 if (!fileName.BeginsWith(begin))
1040                         continue;
1041                         
1042                 nDirs++;
1043                         
1044                 if (first)
1045                 {
1046                         first = kFALSE;
1047                         // check that folder exists, otherwise create it
1048                         TGridResult* result = gGrid->Ls(alienDir.Data(), "a");
1049                         
1050                         if (!result)
1051                         {
1052                                 delete dirList;
1053                                 return kFALSE;
1054                         }
1055                         
1056                         if (!result->GetFileName(1)) // TODO: It looks like element 0 is always 0!!
1057                         {
1058                                 // TODO It does not work currently! Bug in TAliEn::Mkdir
1059                                 // TODO Manually fixed in local root v5-16-00
1060                                 if (!gGrid->Mkdir(alienDir.Data(),"-p",0))
1061                                 {
1062                                         Log("SHUTTLE", Form("CopyFilesToGrid - Cannot create directory %s",
1063                                                         alienDir.Data()));
1064                                         delete dirList;
1065                                         return kFALSE;
1066                                 } else {
1067                                         Log("SHUTTLE",Form("CopyFilesToGrid - Folder %s created", alienDir.Data()));
1068                                 }
1069                                 
1070                         } else {
1071                                         Log("SHUTTLE",Form("CopyFilesToGrid - Folder %s found", alienDir.Data()));
1072                         }
1073                 }
1074                         
1075                 TString fullLocalPath;
1076                 fullLocalPath.Form("%s/%s", dir.Data(), fileName.Data());
1077                 
1078                 TString fullGridPath;
1079                 fullGridPath.Form("alien://%s/%s", alienDir.Data(), fileName.Data());
1080
1081                 Bool_t result = TFile::Cp(fullLocalPath, fullGridPath);
1082                 
1083                 if (result)
1084                 {
1085                         Log("SHUTTLE", Form("CopyFilesToGrid - Copying local file %s to %s succeeded!", 
1086                                                 fullLocalPath.Data(), fullGridPath.Data()));
1087                         RemoveFile(fullLocalPath);
1088                         nTransfer++;
1089                 }
1090                 else
1091                 {
1092                         Log("SHUTTLE", Form("CopyFilesToGrid - Copying local file %s to %s FAILED!", 
1093                                                 fullLocalPath.Data(), fullGridPath.Data()));
1094                         success = kFALSE;
1095                 }
1096         }
1097
1098         Log("SHUTTLE", Form("CopyFilesToGrid - %d (over %d) files in folder %s copied to Grid.", 
1099                                                 nTransfer, nDirs, dir.Data()));
1100
1101                 
1102         delete dirList;
1103         return success;
1104 }
1105
1106 //______________________________________________________________________________________________
1107 const char* AliShuttle::GetRefFilePrefix(const char* base, const char* detector)
1108 {
1109         //
1110         // Get folder name of reference files 
1111         //
1112
1113         TString offDetStr(GetOfflineDetName(detector));
1114         TString dir;
1115         if (offDetStr == "ITS" || offDetStr == "MUON" || offDetStr == "PHOS")
1116         {
1117                 dir.Form("%s/%s/%s", base, offDetStr.Data(), detector);
1118         } else {
1119                 dir.Form("%s/%s", base, offDetStr.Data());
1120         }
1121         
1122         return dir.Data();
1123         
1124
1125 }
1126
1127 //______________________________________________________________________________________________
1128 void AliShuttle::CleanLocalStorage(const TString& uri)
1129 {
1130         //
1131         // Called in case the preprocessor is declared failed. Remove remaining objects from the local storages.
1132         //
1133
1134         const char* type = 0;
1135         if(uri == fgkLocalCDB) {
1136                 type = "OCDB";
1137         } else if(uri == fgkLocalRefStorage) {
1138                 type = "Reference";
1139         } else {
1140                 AliError(Form("Invalid storage URI: %s", uri.Data()));
1141                 return;
1142         }
1143
1144         AliCDBManager* man = AliCDBManager::Instance();
1145
1146         // open local storage
1147         AliCDBStorage *localSto = man->GetStorage(uri);
1148         if(!localSto) {
1149                 Log("SHUTTLE",
1150                         Form("CleanLocalStorage - cannot activate local %s storage", type));
1151                 return;
1152         }
1153
1154         TString filename(Form("%s/%s/*/Run*_v%d_s*.root",
1155                 localSto->GetBaseFolder().Data(), GetOfflineDetName(fCurrentDetector.Data()), GetCurrentRun()));
1156
1157         AliDebug(2, Form("filename = %s", filename.Data()));
1158
1159         Log("SHUTTLE", Form("Removing remaining local files for run %d and detector %s ...",
1160                 GetCurrentRun(), fCurrentDetector.Data()));
1161
1162         RemoveFile(filename.Data());
1163
1164 }
1165
1166 //______________________________________________________________________________________________
1167 void AliShuttle::RemoveFile(const char* filename)
1168 {
1169         //
1170         // removes local file
1171         //
1172
1173         TString command(Form("rm -f %s", filename));
1174
1175         Int_t result = gSystem->Exec(command.Data());
1176         if(result != 0)
1177         {
1178                 Log("SHUTTLE", Form("RemoveFile - %s: Cannot remove file %s!",
1179                         fCurrentDetector.Data(), filename));
1180         }
1181 }
1182
1183 //______________________________________________________________________________________________
1184 AliShuttleStatus* AliShuttle::ReadShuttleStatus()
1185 {
1186         //
1187         // Reads the AliShuttleStatus from the CDB
1188         //
1189
1190         if (fStatusEntry){
1191                 delete fStatusEntry;
1192                 fStatusEntry = 0;
1193         }
1194
1195         fStatusEntry = AliCDBManager::Instance()->GetStorage(GetLocalCDB())
1196                 ->Get(Form("/SHUTTLE/STATUS/%s", fCurrentDetector.Data()), GetCurrentRun());
1197
1198         if (!fStatusEntry) return 0;
1199         fStatusEntry->SetOwner(1);
1200
1201         AliShuttleStatus* status = dynamic_cast<AliShuttleStatus*> (fStatusEntry->GetObject());
1202         if (!status) {
1203                 AliError("Invalid object stored to CDB!");
1204                 return 0;
1205         }
1206
1207         return status;
1208 }
1209
1210 //______________________________________________________________________________________________
1211 Bool_t AliShuttle::WriteShuttleStatus(AliShuttleStatus* status)
1212 {
1213         //
1214         // writes the status for one subdetector
1215         //
1216
1217         if (fStatusEntry){
1218                 delete fStatusEntry;
1219                 fStatusEntry = 0;
1220         }
1221
1222         Int_t run = GetCurrentRun();
1223
1224         AliCDBId id(AliCDBPath("SHUTTLE", "STATUS", fCurrentDetector), run, run);
1225
1226         fStatusEntry = new AliCDBEntry(status, id, new AliCDBMetaData);
1227         fStatusEntry->SetOwner(1);
1228
1229         UInt_t result = AliCDBManager::Instance()->GetStorage(fgkLocalCDB)->Put(fStatusEntry);
1230
1231         if (!result) {
1232                 Log("SHUTTLE", Form("WriteShuttleStatus - Failed for %s, run %d",
1233                                                 fCurrentDetector.Data(), run));
1234                 return kFALSE;
1235         }
1236         
1237         SendMLInfo();
1238
1239         return kTRUE;
1240 }
1241
1242 //______________________________________________________________________________________________
1243 void AliShuttle::UpdateShuttleStatus(AliShuttleStatus::Status newStatus, Bool_t increaseCount)
1244 {
1245         //
1246         // changes the AliShuttleStatus for the given detector and run to the given status
1247         //
1248
1249         if (!fStatusEntry){
1250                 AliError("UNEXPECTED: fStatusEntry empty");
1251                 return;
1252         }
1253
1254         AliShuttleStatus* status = dynamic_cast<AliShuttleStatus*> (fStatusEntry->GetObject());
1255
1256         if (!status){
1257                 Log("SHUTTLE", "UpdateShuttleStatus - UNEXPECTED: status could not be read from current CDB entry");
1258                 return;
1259         }
1260
1261         TString actionStr = Form("UpdateShuttleStatus - %s: Changing state from %s to %s",
1262                                 fCurrentDetector.Data(),
1263                                 status->GetStatusName(),
1264                                 status->GetStatusName(newStatus));
1265         Log("SHUTTLE", actionStr);
1266         SetLastAction(actionStr);
1267
1268         status->SetStatus(newStatus);
1269         if (increaseCount) status->IncreaseCount();
1270
1271         AliCDBManager::Instance()->GetStorage(fgkLocalCDB)->Put(fStatusEntry);
1272
1273         SendMLInfo();
1274 }
1275
1276 //______________________________________________________________________________________________
1277 void AliShuttle::SendMLInfo()
1278 {
1279         //
1280         // sends ML information about the current status of the current detector being processed
1281         //
1282         
1283         AliShuttleStatus* status = dynamic_cast<AliShuttleStatus*> (fStatusEntry->GetObject());
1284         
1285         if (!status){
1286                 Log("SHUTTLE", "SendMLInfo - UNEXPECTED: status could not be read from current CDB entry");
1287                 return;
1288         }
1289         
1290         TMonaLisaText  mlStatus(Form("%s_status", fCurrentDetector.Data()), status->GetStatusName());
1291         TMonaLisaValue mlRetryCount(Form("%s_count", fCurrentDetector.Data()), status->GetCount());
1292
1293         TList mlList;
1294         mlList.Add(&mlStatus);
1295         mlList.Add(&mlRetryCount);
1296
1297         TString mlID;
1298         mlID.Form("%d", GetCurrentRun());
1299         fMonaLisa->SendParameters(&mlList, mlID);
1300 }
1301
1302 //______________________________________________________________________________________________
1303 Bool_t AliShuttle::ContinueProcessing()
1304 {
1305         // this function reads the AliShuttleStatus information from CDB and
1306         // checks if the processing should be continued
1307         // if yes it returns kTRUE and updates the AliShuttleStatus with nextStatus
1308
1309         if (!fConfig->HostProcessDetector(fCurrentDetector)) return kFALSE;
1310
1311         AliPreprocessor* aPreprocessor =
1312                 dynamic_cast<AliPreprocessor*> (fPreprocessorMap.GetValue(fCurrentDetector));
1313         if (!aPreprocessor)
1314         {
1315                 Log("SHUTTLE", Form("ContinueProcessing - %s: no preprocessor registered", fCurrentDetector.Data()));
1316                 return kFALSE;
1317         }
1318
1319         AliShuttleLogbookEntry::Status entryStatus =
1320                 fLogbookEntry->GetDetectorStatus(fCurrentDetector);
1321
1322         if(entryStatus != AliShuttleLogbookEntry::kUnprocessed) {
1323                 Log("SHUTTLE", Form("ContinueProcessing - %s is %s",
1324                                 fCurrentDetector.Data(),
1325                                 fLogbookEntry->GetDetectorStatusName(entryStatus)));
1326                 return kFALSE;
1327         }
1328
1329         // if we get here, according to Shuttle logbook subdetector is in UNPROCESSED state
1330
1331         // check if current run is first unprocessed run for current detector
1332         if (fConfig->StrictRunOrder(fCurrentDetector) &&
1333                 !fFirstUnprocessed[GetDetPos(fCurrentDetector)])
1334         {
1335                 if (fTestMode == kNone)
1336                 {
1337                         Log("SHUTTLE", Form("ContinueProcessing - %s requires strict run ordering"
1338                                         " but this is not the first unprocessed run!"));
1339                         return kFALSE;
1340                 }
1341                 else
1342                 {
1343                         Log("SHUTTLE", Form("ContinueProcessing - In TESTMODE - "
1344                                         "Although %s requires strict run ordering "
1345                                         "and this is not the first unprocessed run, "
1346                                         "the SHUTTLE continues"));
1347                 }
1348         }
1349
1350         AliShuttleStatus* status = ReadShuttleStatus();
1351         if (!status) {
1352                 // first time
1353                 Log("SHUTTLE", Form("ContinueProcessing - %s: Processing first time",
1354                                 fCurrentDetector.Data()));
1355                 status = new AliShuttleStatus(AliShuttleStatus::kStarted);
1356                 return WriteShuttleStatus(status);
1357         }
1358
1359         // The following two cases shouldn't happen if Shuttle Logbook was correctly updated.
1360         // If it happens it may mean Logbook updating failed... let's do it now!
1361         if (status->GetStatus() == AliShuttleStatus::kDone ||
1362             status->GetStatus() == AliShuttleStatus::kFailed){
1363                 Log("SHUTTLE", Form("ContinueProcessing - %s is already %s. Updating Shuttle Logbook",
1364                                         fCurrentDetector.Data(),
1365                                         status->GetStatusName(status->GetStatus())));
1366                 UpdateShuttleLogbook(fCurrentDetector.Data(),
1367                                         status->GetStatusName(status->GetStatus()));
1368                 return kFALSE;
1369         }
1370
1371         if (status->GetStatus() == AliShuttleStatus::kStoreError) {
1372                 Log("SHUTTLE",
1373                         Form("ContinueProcessing - %s: Grid storage of one or more "
1374                                 "objects failed. Trying again now",
1375                                 fCurrentDetector.Data()));
1376                 UpdateShuttleStatus(AliShuttleStatus::kStoreStarted);
1377                 if (StoreOCDB()){
1378                         Log("SHUTTLE", Form("ContinueProcessing - %s: all objects "
1379                                 "successfully stored into main storage",
1380                                 fCurrentDetector.Data()));
1381                 } else {
1382                         Log("SHUTTLE",
1383                                 Form("ContinueProcessing - %s: Grid storage failed again",
1384                                         fCurrentDetector.Data()));
1385                         UpdateShuttleStatus(AliShuttleStatus::kStoreError);
1386                 }
1387                 return kFALSE;
1388         }
1389
1390         // if we get here, there is a restart
1391         Bool_t cont = kFALSE;
1392
1393         // abort conditions
1394         if (status->GetCount() >= fConfig->GetMaxRetries()) {
1395                 Log("SHUTTLE", Form("ContinueProcessing - %s failed %d times in status %s - "
1396                                 "Updating Shuttle Logbook", fCurrentDetector.Data(),
1397                                 status->GetCount(), status->GetStatusName()));
1398                 UpdateShuttleLogbook(fCurrentDetector.Data(), "FAILED");
1399                 UpdateShuttleStatus(AliShuttleStatus::kFailed);
1400
1401                 // there may still be objects in local OCDB and reference storage
1402                 // and FXS databases may be not updated: do it now!
1403                 
1404                 // TODO Currently disabled, we want to keep files in case of failure!
1405                 // CleanLocalStorage(fgkLocalCDB);
1406                 // CleanLocalStorage(fgkLocalRefStorage);
1407                 // UpdateTableFailCase();
1408                 
1409                 // Send mail to detector expert!
1410                 Log("SHUTTLE", Form("ContinueProcessing - Sending mail to %s expert...", 
1411                                         fCurrentDetector.Data()));
1412                 if (!SendMail())
1413                         Log("SHUTTLE", Form("ContinueProcessing - Could not send mail to %s expert",
1414                                         fCurrentDetector.Data()));
1415
1416         } else {
1417                 Log("SHUTTLE", Form("ContinueProcessing - %s: restarting. "
1418                                 "Aborted before with %s. Retry number %d.", fCurrentDetector.Data(),
1419                                 status->GetStatusName(), status->GetCount()));
1420                 Bool_t increaseCount = kTRUE;
1421                 if (status->GetStatus() == AliShuttleStatus::kDCSError || 
1422                         status->GetStatus() == AliShuttleStatus::kDCSStarted)
1423                                 increaseCount = kFALSE;
1424                                 
1425                 UpdateShuttleStatus(AliShuttleStatus::kStarted, increaseCount);
1426                 cont = kTRUE;
1427         }
1428
1429         return cont;
1430 }
1431
1432 //______________________________________________________________________________________________
1433 Bool_t AliShuttle::Process(AliShuttleLogbookEntry* entry)
1434 {
1435         //
1436         // Makes data retrieval for all detectors in the configuration.
1437         // entry: Shuttle logbook entry, contains run paramenters and status of detectors
1438         // (Unprocessed, Inactive, Failed or Done).
1439         // Returns kFALSE in case of error occured and kTRUE otherwise
1440         //
1441
1442         if (!entry) return kFALSE;
1443
1444         fLogbookEntry = entry;
1445
1446         Log("SHUTTLE", Form("\t\t\t^*^*^*^*^*^*^*^*^*^*^*^* run %d: START ^*^*^*^*^*^*^*^*^*^*^*^*",
1447                                         GetCurrentRun()));
1448
1449         // Send the information to ML
1450         TMonaLisaText  mlStatus("SHUTTLE_status", "Processing");
1451         TMonaLisaText  mlRunType("SHUTTLE_runtype", Form("%s (%s)", entry->GetRunType(), entry->GetRunParameter("log")));
1452
1453         TList mlList;
1454         mlList.Add(&mlStatus);
1455         mlList.Add(&mlRunType);
1456
1457         TString mlID;
1458         mlID.Form("%d", GetCurrentRun());
1459         fMonaLisa->SendParameters(&mlList, mlID);
1460
1461         if (fLogbookEntry->IsDone())
1462         {
1463                 Log("SHUTTLE","Process - Shuttle is already DONE. Updating logbook");
1464                 UpdateShuttleLogbook("shuttle_done");
1465                 fLogbookEntry = 0;
1466                 return kTRUE;
1467         }
1468
1469         // read test mode if flag is set
1470         if (fReadTestMode)
1471         {
1472                 fTestMode = kNone;
1473                 TString logEntry(entry->GetRunParameter("log"));
1474                 //printf("log entry = %s\n", logEntry.Data());
1475                 TString searchStr("Testmode: ");
1476                 Int_t pos = logEntry.Index(searchStr.Data());
1477                 //printf("%d\n", pos);
1478                 if (pos >= 0)
1479                 {
1480                         TSubString subStr = logEntry(pos + searchStr.Length(), logEntry.Length());
1481                         //printf("%s\n", subStr.String().Data());
1482                         TString newStr(subStr.Data());
1483                         TObjArray* token = newStr.Tokenize(' ');
1484                         if (token)
1485                         {
1486                                 //token->Print();
1487                                 TObjString* tmpStr = dynamic_cast<TObjString*> (token->First());
1488                                 if (tmpStr)
1489                                 {
1490                                         Int_t testMode = tmpStr->String().Atoi();
1491                                         if (testMode > 0)
1492                                         {
1493                                                 Log("SHUTTLE", Form("Process - Enabling test mode %d", testMode));
1494                                                 SetTestMode((TestMode) testMode);
1495                                         }
1496                                 }
1497                                 delete token;          
1498                         }
1499                 }
1500         }
1501                 
1502         fLogbookEntry->Print("all");
1503
1504         // Initialization
1505         Bool_t hasError = kFALSE;
1506
1507         // Set the CDB and Reference folders according to the year and LHC period
1508         TString lhcPeriod(GetLHCPeriod());
1509         if (lhcPeriod.Length() == 0) 
1510         {
1511                 Log("SHUTTLE","Process - LHCPeriod not found in logbook!");
1512                 return 0; 
1513         }       
1514         
1515         if (fgkMainCDB.Length() == 0)
1516                 fgkMainCDB = Form("alien://folder=/alice/data/%d/%s/OCDB?user=alidaq?cacheFold=/tmp/OCDBCache", 
1517                                         GetCurrentYear(), lhcPeriod.Data());
1518         
1519         if (fgkMainRefStorage.Length() == 0)
1520                 fgkMainRefStorage = Form("alien://folder=/alice/data/%d/%s/Reference?user=alidaq?cacheFold=/tmp/OCDBCache", 
1521                                         GetCurrentYear(), lhcPeriod.Data());
1522         
1523         // Loop on detectors in the configuration
1524         TIter iter(fConfig->GetDetectors());
1525         TObjString* aDetector = 0;
1526
1527         Bool_t first = kTRUE;
1528
1529         while ((aDetector = (TObjString*) iter.Next()))
1530         {
1531                 fCurrentDetector = aDetector->String();
1532
1533                 if (ContinueProcessing() == kFALSE) continue;
1534                 
1535                 if (first)
1536                 {
1537                   // only read QueryCDB when needed and only once
1538                   AliCDBStorage *mainCDBSto = AliCDBManager::Instance()->GetStorage(fgkMainCDB);
1539                   if(mainCDBSto) mainCDBSto->QueryCDB(GetCurrentRun());
1540                   AliCDBStorage *mainRefSto = AliCDBManager::Instance()->GetStorage(fgkMainRefStorage);
1541                   if(mainRefSto) mainRefSto->QueryCDB(GetCurrentRun());
1542                   first = kFALSE;
1543                 }
1544
1545                 Log("SHUTTLE", Form("\t\t\t****** run %d - %s: START  ******",
1546                                                 GetCurrentRun(), aDetector->GetName()));
1547
1548                 for(Int_t iSys=0;iSys<3;iSys++) fFXSCalled[iSys]=kFALSE;
1549
1550                 Log(fCurrentDetector.Data(), "Process - Starting processing");
1551
1552                 Int_t pid = fork();
1553
1554                 if (pid < 0)
1555                 {
1556                         Log("SHUTTLE", "Process - ERROR: Forking failed");
1557                 }
1558                 else if (pid > 0)
1559                 {
1560                         // parent
1561                         Log("SHUTTLE", Form("Process - In parent process of %d - %s: Starting monitoring",
1562                                                         GetCurrentRun(), aDetector->GetName()));
1563
1564                         Long_t begin = time(0);
1565
1566                         int status; // to be used with waitpid, on purpose an int (not Int_t)!
1567                         while (waitpid(pid, &status, WNOHANG) == 0)
1568                         {
1569                                 Long_t expiredTime = time(0) - begin;
1570
1571                                 if (expiredTime > fConfig->GetPPTimeOut())
1572                                 {
1573                                         TString tmp;
1574                                         tmp.Form("Process - Process of %s time out. "
1575                                                         "Run time: %d seconds. Killing...",
1576                                                         fCurrentDetector.Data(), expiredTime);
1577                                         Log("SHUTTLE", tmp);
1578                                         Log(fCurrentDetector, tmp);
1579
1580                                         kill(pid, 9);
1581
1582                                         UpdateShuttleStatus(AliShuttleStatus::kPPTimeOut);
1583                                         hasError = kTRUE;
1584
1585                                         gSystem->Sleep(1000);
1586                                 }
1587                                 else
1588                                 {
1589                                         gSystem->Sleep(1000);
1590                                         
1591                                         TString checkStr;
1592                                         checkStr.Form("ps -o vsize --pid %d | tail -n 1", pid);
1593                                         FILE* pipe = gSystem->OpenPipe(checkStr, "r");
1594                                         if (!pipe)
1595                                         {
1596                                                 Log("SHUTTLE", Form("Process - Error: "
1597                                                         "Could not open pipe to %s", checkStr.Data()));
1598                                                 continue;
1599                                         }
1600                                                 
1601                                         char buffer[100];
1602                                         if (!fgets(buffer, 100, pipe))
1603                                         {
1604                                                 Log("SHUTTLE", "Process - Error: ps did not return anything");
1605                                                 gSystem->ClosePipe(pipe);
1606                                                 continue;
1607                                         }
1608                                         gSystem->ClosePipe(pipe);
1609                                         
1610                                         //Log("SHUTTLE", Form("ps returned %s", buffer));
1611                                         
1612                                         Int_t mem = 0;
1613                                         if ((sscanf(buffer, "%d\n", &mem) != 1) || !mem)
1614                                         {
1615                                                 Log("SHUTTLE", "Process - Error: Could not parse output of ps");
1616                                                 continue;
1617                                         }
1618                                         
1619                                         if (expiredTime % 60 == 0)
1620                                         {
1621                                                 Log("SHUTTLE", Form("Process - %s: Checking process. "
1622                                                         "Run time: %d seconds - Memory consumption: %d KB",
1623                                                         fCurrentDetector.Data(), expiredTime, mem));
1624                                                 SendAlive();
1625                                         }
1626                                         
1627                                         if (mem > fConfig->GetPPMaxMem())
1628                                         {
1629                                                 TString tmp;
1630                                                 tmp.Form("Process - Process exceeds maximum allowed memory "
1631                                                         "(%d KB > %d KB). Killing...",
1632                                                         mem, fConfig->GetPPMaxMem());
1633                                                 Log("SHUTTLE", tmp);
1634                                                 Log(fCurrentDetector, tmp);
1635         
1636                                                 kill(pid, 9);
1637         
1638                                                 UpdateShuttleStatus(AliShuttleStatus::kPPOutOfMemory);
1639                                                 hasError = kTRUE;
1640         
1641                                                 gSystem->Sleep(1000);
1642                                         }
1643                                 }
1644                         }
1645
1646                         Log("SHUTTLE", Form("Process - In parent process of %d - %s: Client has terminated.",
1647                                                                 GetCurrentRun(), aDetector->GetName()));
1648
1649                         if (WIFEXITED(status))
1650                         {
1651                                 Int_t returnCode = WEXITSTATUS(status);
1652
1653                                 Log("SHUTTLE", Form("Process - %s: the return code is %d", fCurrentDetector.Data(),
1654                                                                                 returnCode));
1655
1656                                 if (returnCode == 0) hasError = kTRUE;
1657                         }
1658                 }
1659                 else if (pid == 0)
1660                 {
1661                         // client
1662                         Log("SHUTTLE", Form("Process - In client process of %d - %s", GetCurrentRun(),
1663                                 aDetector->GetName()));
1664
1665                         Log("SHUTTLE", Form("Process - Redirecting output to %s log",fCurrentDetector.Data()));
1666
1667                         if ((freopen(GetLogFileName(fCurrentDetector), "a", stdout)) == 0)
1668                         {
1669                                 Log("SHUTTLE", "Process - Could not freopen stdout");
1670                         }
1671                         else
1672                         {
1673                                 fOutputRedirected = kTRUE;
1674                                 if ((dup2(fileno(stdout), fileno(stderr))) < 0)
1675                                         Log("SHUTTLE", "Process - Could not redirect stderr");
1676                                 
1677                         }
1678                         
1679                         TString wd = gSystem->WorkingDirectory();
1680                         TString tmpDir = Form("%s/%s_%d_process", GetShuttleTempDir(), 
1681                                 fCurrentDetector.Data(), GetCurrentRun());
1682                         
1683                         Int_t result = gSystem->GetPathInfo(tmpDir.Data(), 0, (Long64_t*) 0, 0, 0);
1684                         if (!result) // temp dir already exists!
1685                         {
1686                                 Log(fCurrentDetector.Data(), 
1687                                         Form("Process - %s dir already exists! Removing...", tmpDir.Data()));
1688                                 gSystem->Exec(Form("rm -rf %s",tmpDir.Data()));         
1689                         } 
1690                         
1691                         if (gSystem->mkdir(tmpDir.Data(), 1))
1692                         {
1693                                 Log(fCurrentDetector.Data(), "Process - could not make temp directory!!");
1694                                 gSystem->Exit(1);
1695                         }
1696                         
1697                         if (!gSystem->ChangeDirectory(tmpDir.Data())) 
1698                         {
1699                                 Log(fCurrentDetector.Data(), "Process - could not change directory!!");
1700                                 gSystem->Exit(1);                       
1701                         }
1702                         
1703                         Bool_t success = ProcessCurrentDetector();
1704                         
1705                         gSystem->ChangeDirectory(wd.Data());
1706                                                 
1707                         if (success) // Preprocessor finished successfully!
1708                         { 
1709                                 // remove temporary folder
1710                                 // temporary commented (JF)
1711                                 //gSystem->Exec(Form("rm -rf %s",tmpDir.Data()));
1712                                 
1713                                 // Update time_processed field in FXS DB
1714                                 if (UpdateTable() == kFALSE)
1715                                         Log("SHUTTLE", Form("Process - %s: Could not update FXS databases!", 
1716                                                         fCurrentDetector.Data()));
1717
1718                                 // Transfer the data from local storage to main storage (Grid)
1719                                 UpdateShuttleStatus(AliShuttleStatus::kStoreStarted);
1720                                 if (StoreOCDB() == kFALSE)
1721                                 {
1722                                         Log("SHUTTLE", 
1723                                                 Form("\t\t\t****** run %d - %s: STORAGE ERROR ******",
1724                                                         GetCurrentRun(), aDetector->GetName()));
1725                                         UpdateShuttleStatus(AliShuttleStatus::kStoreError);
1726                                         success = kFALSE;
1727                                 } else {
1728                                         Log("SHUTTLE", 
1729                                                 Form("\t\t\t****** run %d - %s: DONE ******",
1730                                                         GetCurrentRun(), aDetector->GetName()));
1731                                         UpdateShuttleStatus(AliShuttleStatus::kDone);
1732                                         UpdateShuttleLogbook(fCurrentDetector, "DONE");
1733                                 }
1734                         } else 
1735                         {
1736                                 Log("SHUTTLE", 
1737                                         Form("\t\t\t****** run %d - %s: PP ERROR ******",
1738                                                 GetCurrentRun(), aDetector->GetName()));
1739                         }
1740
1741                         for (UInt_t iSys=0; iSys<3; iSys++)
1742                         {
1743                                 if (fFXSCalled[iSys]) fFXSlist[iSys].Clear();
1744                         }
1745
1746                         Log("SHUTTLE", Form("Process - Client process of %d - %s is exiting now with %d.",
1747                                                         GetCurrentRun(), aDetector->GetName(), success));
1748
1749                         // the client exits here
1750                         gSystem->Exit(success);
1751
1752                         AliError("We should never get here!!!");
1753                 }
1754         }
1755
1756         Log("SHUTTLE", Form("\t\t\t^*^*^*^*^*^*^*^*^*^*^*^* run %d: FINISH ^*^*^*^*^*^*^*^*^*^*^*^*",
1757                                                         GetCurrentRun()));
1758
1759         //check if shuttle is done for this run, if so update logbook
1760         TObjArray checkEntryArray;
1761         checkEntryArray.SetOwner(1);
1762         TString whereClause = Form("where run=%d", GetCurrentRun());
1763         if (!QueryShuttleLogbook(whereClause.Data(), checkEntryArray) || 
1764                         checkEntryArray.GetEntries() == 0) {
1765                 Log("SHUTTLE", Form("Process - Warning: Cannot check status of run %d on Shuttle logbook!",
1766                                                 GetCurrentRun()));
1767                 return hasError == kFALSE;
1768         }
1769
1770         AliShuttleLogbookEntry* checkEntry = dynamic_cast<AliShuttleLogbookEntry*>
1771                                                 (checkEntryArray.At(0));
1772
1773         if (checkEntry)
1774         {
1775                 if (checkEntry->IsDone())
1776                 {
1777                         Log("SHUTTLE","Process - Shuttle is DONE. Updating logbook");
1778                         UpdateShuttleLogbook("shuttle_done");
1779                 }
1780                 else
1781                 {
1782                         for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
1783                         {
1784                                 if (checkEntry->GetDetectorStatus(iDet) == AliShuttleLogbookEntry::kUnprocessed)
1785                                 {
1786                                         AliDebug(2, Form("Run %d: setting %s as \"not first time unprocessed\"",
1787                                                         checkEntry->GetRun(), GetDetName(iDet)));
1788                                         fFirstUnprocessed[iDet] = kFALSE;
1789                                 }
1790                         }
1791                 }
1792         }
1793
1794         fLogbookEntry = 0;
1795
1796         return hasError == kFALSE;
1797 }
1798
1799 //______________________________________________________________________________________________
1800 Bool_t AliShuttle::ProcessCurrentDetector()
1801 {
1802         //
1803         // Makes data retrieval just for a specific detector (fCurrentDetector).
1804         // Threre should be a configuration for this detector.
1805
1806         Log("SHUTTLE", Form("ProcessCurrentDetector - Retrieving values for %s, run %d", 
1807                                                 fCurrentDetector.Data(), GetCurrentRun()));
1808
1809         TString wd = gSystem->WorkingDirectory();
1810         
1811         if (!CleanReferenceStorage(fCurrentDetector.Data()))
1812                 return kFALSE;
1813         
1814         gSystem->ChangeDirectory(wd.Data());
1815         
1816         TMap* dcsMap = new TMap();
1817
1818         // call preprocessor
1819         AliPreprocessor* aPreprocessor =
1820                 dynamic_cast<AliPreprocessor*> (fPreprocessorMap.GetValue(fCurrentDetector));
1821
1822         aPreprocessor->Initialize(GetCurrentRun(), GetCurrentStartTime(), GetCurrentEndTime());
1823
1824         Bool_t processDCS = aPreprocessor->ProcessDCS();
1825
1826         if (!processDCS)
1827         {
1828                 Log(fCurrentDetector, "ProcessCurrentDetector -"
1829                         " The preprocessor requested to skip the retrieval of DCS values");
1830         }
1831         else if (fTestMode & kSkipDCS)
1832         {
1833                 Log(fCurrentDetector, "ProcessCurrentDetector - In TESTMODE: Skipping DCS processing");
1834         } 
1835         else if (fTestMode & kErrorDCS)
1836         {
1837                 Log(fCurrentDetector, "ProcessCurrentDetector - In TESTMODE: Simulating DCS error");
1838                 UpdateShuttleStatus(AliShuttleStatus::kDCSStarted);
1839                 UpdateShuttleStatus(AliShuttleStatus::kDCSError);
1840                 delete dcsMap;
1841                 return kFALSE;
1842         } else {
1843
1844                 UpdateShuttleStatus(AliShuttleStatus::kDCSStarted);
1845
1846                 // Query DCS archive
1847                 Int_t nServers = fConfig->GetNServers(fCurrentDetector);
1848                 
1849                 for (int iServ=0; iServ<nServers; iServ++)
1850                 {
1851                 
1852                         TString host(fConfig->GetDCSHost(fCurrentDetector, iServ));
1853                         Int_t port = fConfig->GetDCSPort(fCurrentDetector, iServ);
1854                         Int_t multiSplit = fConfig->GetMultiSplit(fCurrentDetector, iServ);
1855
1856                         Log(fCurrentDetector, Form("ProcessCurrentDetector -"
1857                                         " Querying DCS Amanda server %s:%d (%d of %d)", 
1858                                         host.Data(), port, iServ+1, nServers));
1859                         
1860                         TMap* aliasMap = 0;
1861                         TMap* dpMap = 0;
1862         
1863                         if (fConfig->GetDCSAliases(fCurrentDetector, iServ)->GetEntries() > 0)
1864                         {
1865                                 aliasMap = GetValueSet(host, port, 
1866                                                 fConfig->GetDCSAliases(fCurrentDetector, iServ), 
1867                                                 kAlias, multiSplit);
1868                                 if (!aliasMap)
1869                                 {
1870                                         Log(fCurrentDetector, 
1871                                                 Form("ProcessCurrentDetector -"
1872                                                         " Error retrieving DCS aliases from server %s."
1873                                                         " Sending mail to DCS experts!", host.Data()));
1874                                         UpdateShuttleStatus(AliShuttleStatus::kDCSError);
1875                                         
1876                                         //if (!SendMailToDCS())
1877                                         //      Log("SHUTTLE", Form("ProcessCurrentDetector - Could not send mail to DCS experts!"));
1878
1879                                         delete dcsMap;
1880                                         return kFALSE;
1881                                 }
1882                         }
1883                         
1884                         if (fConfig->GetDCSDataPoints(fCurrentDetector, iServ)->GetEntries() > 0)
1885                         {
1886                                 dpMap = GetValueSet(host, port, 
1887                                                 fConfig->GetDCSDataPoints(fCurrentDetector, iServ), 
1888                                                 kDP, multiSplit);
1889                                 if (!dpMap)
1890                                 {
1891                                         Log(fCurrentDetector, 
1892                                                 Form("ProcessCurrentDetector -"
1893                                                         " Error retrieving DCS data points from server %s."
1894                                                         " Sending mail to DCS experts!", host.Data()));
1895                                         UpdateShuttleStatus(AliShuttleStatus::kDCSError);
1896                                         
1897                                         //if (!SendMailToDCS())
1898                                         //      Log("SHUTTLE", Form("ProcessCurrentDetector - Could not send mail to DCS experts!"));
1899                                         
1900                                         if (aliasMap) delete aliasMap;
1901                                         delete dcsMap;
1902                                         return kFALSE;
1903                                 }                               
1904                         }
1905                         
1906                         // merge aliasMap and dpMap into dcsMap
1907                         if(aliasMap) {
1908                                 TIter iter(aliasMap);
1909                                 TObjString* key = 0;
1910                                 while ((key = (TObjString*) iter.Next()))
1911                                         dcsMap->Add(key, aliasMap->GetValue(key->String()));
1912                                 
1913                                 aliasMap->SetOwner(kFALSE);
1914                                 delete aliasMap;
1915                         }       
1916                         
1917                         if(dpMap) {
1918                                 TIter iter(dpMap);
1919                                 TObjString* key = 0;
1920                                 while ((key = (TObjString*) iter.Next()))
1921                                         dcsMap->Add(key, dpMap->GetValue(key->String()));
1922                                 
1923                                 dpMap->SetOwner(kFALSE);
1924                                 delete dpMap;
1925                         }
1926                 }
1927         }
1928         
1929         // save map into file, to help debugging in case of preprocessor error
1930         TFile* f = TFile::Open("DCSMap.root","recreate");
1931         f->cd();
1932         dcsMap->Write("DCSMap", TObject::kSingleKey);
1933         f->Close();
1934         delete f;
1935         
1936         // DCS Archive DB processing successful. Call Preprocessor!
1937         UpdateShuttleStatus(AliShuttleStatus::kPPStarted);
1938
1939         UInt_t returnValue = aPreprocessor->Process(dcsMap);
1940
1941         if (returnValue > 0) // Preprocessor error!
1942         {
1943                 Log(fCurrentDetector, Form("ProcessCurrentDetector - "
1944                                 "Preprocessor failed. Process returned %d.", returnValue));
1945                 UpdateShuttleStatus(AliShuttleStatus::kPPError);
1946                 dcsMap->DeleteAll();
1947                 delete dcsMap;
1948                 return kFALSE;
1949         }
1950         
1951         // preprocessor ok!
1952         UpdateShuttleStatus(AliShuttleStatus::kPPDone);
1953         Log(fCurrentDetector, Form("ProcessCurrentDetector - %s preprocessor returned success",
1954                                 fCurrentDetector.Data()));
1955
1956         dcsMap->DeleteAll();
1957         delete dcsMap;
1958
1959         return kTRUE;
1960 }
1961
1962 //______________________________________________________________________________________________
1963 void AliShuttle::CountOpenRuns()
1964 {
1965         // Query DAQ's Shuttle logbook and sends the number of open runs to ML
1966         
1967         // check connection, in case connect
1968         if (!Connect(3)) 
1969                 return;
1970
1971         TString sqlQuery;
1972         sqlQuery = Form("select count(*) from %s where shuttle_done=0", fConfig->GetShuttlelbTable());
1973         
1974         TSQLResult* aResult = fServer[3]->Query(sqlQuery);
1975         if (!aResult) {
1976                 AliError(Form("Can't execute query <%s>!", sqlQuery.Data()));
1977                 return;
1978         }
1979
1980         AliDebug(2,Form("Query = %s", sqlQuery.Data()));
1981         
1982         if (aResult->GetRowCount() == 0) {
1983                 AliError(Form("No result for query %s received", sqlQuery.Data()));
1984                 return;
1985         }
1986
1987         if (aResult->GetFieldCount() != 1) {
1988                 AliError(Form("Invalid field count for query %s received", sqlQuery.Data()));
1989                 return;
1990         }
1991
1992         TSQLRow* aRow = aResult->Next();
1993         if (!aRow) {
1994                 AliError(Form("Could not receive result of query %s", sqlQuery.Data()));
1995                 return;
1996         }
1997         
1998         TString result(aRow->GetField(0), aRow->GetFieldLength(0));
1999         Int_t count = result.Atoi();
2000         
2001         Log("SHUTTLE", Form("%d unprocessed runs", count));
2002         
2003         delete aRow;
2004         delete aResult;
2005
2006         TMonaLisaValue mlStatus("SHUTTLE_openruns", count);
2007
2008         TList mlList;
2009         mlList.Add(&mlStatus);
2010
2011         fMonaLisa->SendParameters(&mlList, "__PROCESSINGINFO__");
2012 }
2013
2014 //______________________________________________________________________________________________
2015 Bool_t AliShuttle::QueryShuttleLogbook(const char* whereClause,
2016                 TObjArray& entries)
2017 {
2018         // Query DAQ's Shuttle logbook and fills detector status object.
2019         // Call QueryRunParameters to query DAQ logbook for run parameters.
2020         //
2021
2022         entries.SetOwner(1);
2023
2024         // check connection, in case connect
2025         if (!Connect(3)) return kFALSE;
2026
2027         TString sqlQuery;
2028         sqlQuery = Form("select * from %s %s order by run", fConfig->GetShuttlelbTable(), whereClause);
2029
2030         TSQLResult* aResult = fServer[3]->Query(sqlQuery);
2031         if (!aResult) {
2032                 AliError(Form("Can't execute query <%s>!", sqlQuery.Data()));
2033                 return kFALSE;
2034         }
2035
2036         AliDebug(2,Form("Query = %s", sqlQuery.Data()));
2037
2038         if(aResult->GetRowCount() == 0) {
2039                 Log("SHUTTLE", "No entries in Shuttle Logbook match request");
2040                 delete aResult;
2041                 return kTRUE;
2042         }
2043
2044         // TODO Check field count!
2045         const UInt_t nCols = 23;
2046         if (aResult->GetFieldCount() != (Int_t) nCols) {
2047                 Log("SHUTTLE", "Invalid SQL result field number!");
2048                 delete aResult;
2049                 return kFALSE;
2050         }
2051
2052         TSQLRow* aRow;
2053         while ((aRow = aResult->Next())) {
2054                 TString runString(aRow->GetField(0), aRow->GetFieldLength(0));
2055                 Int_t run = runString.Atoi();
2056
2057                 AliShuttleLogbookEntry *entry = QueryRunParameters(run);
2058                 if (!entry)
2059                         continue;
2060
2061                 // loop on detectors
2062                 for(UInt_t ii = 0; ii < nCols; ii++)
2063                         entry->SetDetectorStatus(aResult->GetFieldName(ii), aRow->GetField(ii));
2064
2065                 entries.AddLast(entry);
2066                 delete aRow;
2067         }
2068
2069         delete aResult;
2070         return kTRUE;
2071 }
2072
2073 //______________________________________________________________________________________________
2074 AliShuttleLogbookEntry* AliShuttle::QueryRunParameters(Int_t run)
2075 {
2076         //
2077         // Retrieve run parameters written in the DAQ logbook and sets them into AliShuttleLogbookEntry object
2078         //
2079
2080         // check connection, in case connect
2081         if (!Connect(3))
2082                 return 0;
2083
2084         TString sqlQuery;
2085         sqlQuery.Form("select * from %s where run=%d", fConfig->GetDAQlbTable(), run);
2086
2087         TSQLResult* aResult = fServer[3]->Query(sqlQuery);
2088         if (!aResult) {
2089                 Log("SHUTTLE", Form("Can't execute query <%s>!", sqlQuery.Data()));
2090                 return 0;
2091         }
2092
2093         if (aResult->GetRowCount() == 0) {
2094                 Log("SHUTTLE", Form("QueryRunParameters - No entry in DAQ Logbook for run %d. Skipping", run));
2095                 delete aResult;
2096                 return 0;
2097         }
2098
2099         if (aResult->GetRowCount() > 1) {
2100                 Log("SHUTTLE", Form("QueryRunParameters - UNEXPECTED: "
2101                                 "more than one entry in DAQ Logbook for run %d!", run));
2102                 delete aResult;
2103                 return 0;
2104         }
2105
2106         TSQLRow* aRow = aResult->Next();
2107         if (!aRow)
2108         {
2109                 Log("SHUTTLE", Form("QueryRunParameters - Could not retrieve row for run %d. Skipping", run));
2110                 delete aResult;
2111                 return 0;
2112         }
2113
2114         AliShuttleLogbookEntry* entry = new AliShuttleLogbookEntry(run);
2115
2116         for (Int_t ii = 0; ii < aResult->GetFieldCount(); ii++)
2117                 entry->SetRunParameter(aResult->GetFieldName(ii), aRow->GetField(ii));
2118
2119         UInt_t startTime = entry->GetStartTime();
2120         UInt_t endTime = entry->GetEndTime();
2121
2122 //      if (!startTime || !endTime || startTime > endTime) 
2123 //      {
2124 //              Log("SHUTTLE",
2125 //                      Form("QueryRunParameters - Invalid parameters for Run %d: startTime = %d, endTime = %d. Skipping!",
2126 //                              run, startTime, endTime));              
2127 //              
2128 //              Log("SHUTTLE", Form("Marking SHUTTLE done for run %d", run));
2129 //              fLogbookEntry = entry;  
2130 //              if (!UpdateShuttleLogbook("shuttle_done"))
2131 //              {
2132 //                      AliError(Form("Could not update logbook for run %d !", run));
2133 //              }
2134 //              fLogbookEntry = 0;
2135 //                              
2136 //              delete entry;
2137 //              delete aRow;
2138 //              delete aResult;
2139 //              return 0;
2140 //      }
2141
2142         if (!startTime) 
2143         {
2144                 Log("SHUTTLE",
2145                         Form("QueryRunParameters - Invalid parameters for Run %d: " 
2146                                 "startTime = %d, endTime = %d. Skipping!",
2147                                         run, startTime, endTime));              
2148                 
2149                 Log("SHUTTLE", Form("Marking SHUTTLE done for run %d", run));
2150                 fLogbookEntry = entry;  
2151                 if (!UpdateShuttleLogbook("shuttle_ignored"))
2152                 {
2153                         AliError(Form("Could not update logbook for run %d !", run));
2154                 }
2155                 fLogbookEntry = 0;
2156                                 
2157                 delete entry;
2158                 delete aRow;
2159                 delete aResult;
2160                 return 0;
2161         }
2162         
2163         if (startTime && !endTime) 
2164         {
2165                 // TODO Here we don't mark SHUTTLE done, because this may mean 
2166                 //the run is still ongoing!!            
2167                 Log("SHUTTLE",
2168                         Form("QueryRunParameters - Invalid parameters for Run %d: "
2169                              "startTime = %d, endTime = %d. Skipping (Shuttle won't be marked as DONE)!",
2170                                         run, startTime, endTime));              
2171                 
2172                 //Log("SHUTTLE", Form("Marking SHUTTLE done for run %d", run));
2173                 //fLogbookEntry = entry;        
2174                 //if (!UpdateShuttleLogbook("shuttle_done"))
2175                 //{
2176                 //      AliError(Form("Could not update logbook for run %d !", run));
2177                 //}
2178                 //fLogbookEntry = 0;
2179                                 
2180                 delete entry;
2181                 delete aRow;
2182                 delete aResult;
2183                 return 0;
2184         }
2185                         
2186         if (startTime && endTime && (startTime > endTime)) 
2187         {
2188                 Log("SHUTTLE",
2189                         Form("QueryRunParameters - Invalid parameters for Run %d: "
2190                                 "startTime = %d, endTime = %d. Skipping!",
2191                                         run, startTime, endTime));              
2192                 
2193                 Log("SHUTTLE", Form("Marking SHUTTLE done for run %d", run));
2194                 fLogbookEntry = entry;  
2195                 if (!UpdateShuttleLogbook("shuttle_ignored"))
2196                 {
2197                         AliError(Form("Could not update logbook for run %d !", run));
2198                 }
2199                 fLogbookEntry = 0;
2200                                 
2201                 delete entry;
2202                 delete aRow;
2203                 delete aResult;
2204                 return 0;
2205         }
2206                         
2207         TString totEventsStr = entry->GetRunParameter("totalEvents");  
2208         Int_t totEvents = totEventsStr.Atoi();
2209         if (totEvents < 1) 
2210         {
2211                 Log("SHUTTLE",
2212                         Form("QueryRunParameters - Run %d has 0 events - Skipping!", run));             
2213                 
2214                 Log("SHUTTLE", Form("Marking SHUTTLE done for run %d", run));           
2215                 fLogbookEntry = entry;  
2216                 if (!UpdateShuttleLogbook("shuttle_ignored"))
2217                 {
2218                         AliError(Form("Could not update logbook for run %d !", run));
2219                 }
2220                 fLogbookEntry = 0;
2221                                 
2222                 delete entry;
2223                 delete aRow;
2224                 delete aResult;
2225                 return 0;
2226         }
2227
2228         delete aRow;
2229         delete aResult;
2230
2231         return entry;
2232 }
2233
2234 //______________________________________________________________________________________________
2235 TMap* AliShuttle::GetValueSet(const char* host, Int_t port, const TSeqCollection* entries,
2236                               DCSType type, Int_t multiSplit)
2237 {
2238         // Retrieve all "entry" data points from the DCS server
2239         // host, port: TSocket connection parameters
2240         // entries: list of name of the alias or data point
2241         // type: kAlias or kDP
2242         // returns TMap of values, 0 when failure
2243         
2244         AliDCSClient client(host, port, fTimeout, fRetries, multiSplit);
2245
2246         TMap* result = 0;
2247         if (type == kAlias)
2248         {
2249                 result = client.GetAliasValues(entries, GetCurrentStartTime(), 
2250                         GetCurrentEndTime());
2251         } 
2252         else if (type == kDP)
2253         {
2254                 result = client.GetDPValues(entries, GetCurrentStartTime(), 
2255                         GetCurrentEndTime());
2256         }
2257
2258         if (result == 0)
2259         {
2260                 Log(fCurrentDetector.Data(), Form("GetValueSet - Can't get entries! Reason: %s",
2261                         client.GetErrorString(client.GetResultErrorCode())));
2262                 if (client.GetResultErrorCode() == AliDCSClient::fgkServerError)        
2263                         Log(fCurrentDetector.Data(), Form("GetValueSet - Server error code: %s",
2264                                 client.GetServerError().Data()));
2265
2266                 return 0;
2267         }
2268                 
2269         return result;
2270 }
2271
2272 //______________________________________________________________________________________________
2273 const char* AliShuttle::GetFile(Int_t system, const char* detector,
2274                 const char* id, const char* source)
2275 {
2276         // Get calibration file from file exchange servers
2277         // First queris the FXS database for the file name, using the run, detector, id and source info
2278         // then calls RetrieveFile(filename) for actual copy to local disk
2279         // run: current run being processed (given by Logbook entry fLogbookEntry)
2280         // detector: the Preprocessor name
2281         // id: provided as a parameter by the Preprocessor
2282         // source: provided by the Preprocessor through GetFileSources function
2283
2284         // check if test mode should simulate a FXS error
2285         if (fTestMode & kErrorFXSFiles)
2286         {
2287                 Log(detector, Form("GetFile - In TESTMODE - Simulating error while connecting to %s FXS", GetSystemName(system)));
2288                 return 0;
2289         }
2290         
2291         // check connection, in case connect
2292         if (!Connect(system))
2293         {
2294                 Log(detector, Form("GetFile - Couldn't connect to %s FXS database", GetSystemName(system)));
2295                 return 0;
2296         }
2297
2298         // Query preparation
2299         TString sourceName(source);
2300         Int_t nFields = 3;
2301         TString sqlQueryStart = Form("select filePath,size,fileChecksum from %s where",
2302                                                                 fConfig->GetFXSdbTable(system));
2303         TString whereClause = Form("run=%d and detector=\"%s\" and fileId=\"%s\"",
2304                                                                 GetCurrentRun(), detector, id);
2305
2306         if (system == kDAQ)
2307         {
2308                 whereClause += Form(" and DAQsource=\"%s\"", source);
2309         }
2310         else if (system == kDCS)
2311         {
2312                 sourceName="none";
2313         }
2314         else if (system == kHLT)
2315         {
2316                 whereClause += Form(" and DDLnumbers=\"%s\"", source);
2317                 nFields = 3;
2318         }
2319
2320         TString sqlQuery = Form("%s %s", sqlQueryStart.Data(), whereClause.Data());
2321
2322         AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
2323
2324         // Query execution
2325         TSQLResult* aResult = 0;
2326         aResult = dynamic_cast<TSQLResult*> (fServer[system]->Query(sqlQuery));
2327         if (!aResult) {
2328                 Log(detector, Form("GetFileName - Can't execute SQL query to %s database for: id = %s, source = %s",
2329                                 GetSystemName(system), id, sourceName.Data()));
2330                 return 0;
2331         }
2332
2333         if(aResult->GetRowCount() == 0)
2334         {
2335                 Log(detector,
2336                         Form("GetFileName - No entry in %s FXS db for: id = %s, source = %s",
2337                                 GetSystemName(system), id, sourceName.Data()));
2338                 delete aResult;
2339                 return 0;
2340         }
2341
2342         if (aResult->GetRowCount() > 1) {
2343                 Log(detector,
2344                         Form("GetFileName - More than one entry in %s FXS db for: id = %s, source = %s",
2345                                 GetSystemName(system), id, sourceName.Data()));
2346                 delete aResult;
2347                 return 0;
2348         }
2349
2350         if (aResult->GetFieldCount() != nFields) {
2351                 Log(detector,
2352                         Form("GetFileName - Wrong field count in %s FXS db for: id = %s, source = %s",
2353                                 GetSystemName(system), id, sourceName.Data()));
2354                 delete aResult;
2355                 return 0;
2356         }
2357
2358         TSQLRow* aRow = dynamic_cast<TSQLRow*> (aResult->Next());
2359
2360         if (!aRow){
2361                 Log(detector, Form("GetFileName - Empty set result in %s FXS db from query: id = %s, source = %s",
2362                                 GetSystemName(system), id, sourceName.Data()));
2363                 delete aResult;
2364                 return 0;
2365         }
2366
2367         TString filePath(aRow->GetField(0), aRow->GetFieldLength(0));
2368         TString fileSize(aRow->GetField(1), aRow->GetFieldLength(1));
2369         TString fileChecksum(aRow->GetField(2), aRow->GetFieldLength(2));
2370
2371         delete aResult;
2372         delete aRow;
2373
2374         AliDebug(2, Form("filePath = %s; size = %s, fileChecksum = %s",
2375                                 filePath.Data(), fileSize.Data(), fileChecksum.Data()));
2376
2377         // retrieved file is renamed to make it unique
2378         TString localFileName = Form("%s/%s_%d_process/%s_%s_%d_%s_%s.shuttle",
2379                                         GetShuttleTempDir(), detector, GetCurrentRun(),
2380                                         GetSystemName(system), detector, GetCurrentRun(), 
2381                                         id, sourceName.Data());
2382
2383
2384         // file retrieval from FXS
2385         UInt_t nRetries = 0;
2386         UInt_t maxRetries = 3;
2387         Bool_t result = kFALSE;
2388
2389         // copy!! if successful TSystem::Exec returns 0
2390         while(nRetries++ < maxRetries) {
2391                 AliDebug(2, Form("Trying to copy file. Retry # %d", nRetries));
2392                 result = RetrieveFile(system, filePath.Data(), localFileName.Data());
2393                 if(!result)
2394                 {
2395                         Log(detector, Form("GetFileName - Copy of file %s from %s FXS failed",
2396                                         filePath.Data(), GetSystemName(system)));
2397                         continue;
2398                 } 
2399
2400                 if (fileChecksum.Length()>0)
2401                 {
2402                         // compare md5sum of local file with the one stored in the FXS DB
2403                         Int_t md5Comp = gSystem->Exec(Form("md5sum %s |grep %s 2>&1 > /dev/null",
2404                                                 localFileName.Data(), fileChecksum.Data()));
2405
2406                         if (md5Comp != 0)
2407                         {
2408                                 Log(detector, Form("GetFileName - md5sum of file %s does not match with local copy!",
2409                                                         filePath.Data()));
2410                                 result = kFALSE;
2411                                 continue;
2412                         }
2413                 } else {
2414                         Log(fCurrentDetector, Form("GetFile - md5sum of file %s not set in %s database, skipping comparison",
2415                                                         filePath.Data(), GetSystemName(system)));
2416                 }
2417                 if (result) break;
2418         }
2419
2420         if(!result) return 0;
2421
2422         fFXSCalled[system]=kTRUE;
2423         TObjString *fileParams = new TObjString(Form("%s#!?!#%s", id, sourceName.Data()));
2424         fFXSlist[system].Add(fileParams);
2425
2426         static TString staticLocalFileName;
2427         staticLocalFileName.Form("%s", localFileName.Data());
2428         
2429         Log(fCurrentDetector, Form("GetFile - Retrieved file with id %s and "
2430                         "source %s from %s to %s", id, source, 
2431                         GetSystemName(system), localFileName.Data()));
2432                         
2433         return staticLocalFileName.Data();
2434 }
2435
2436 //______________________________________________________________________________________________
2437 Bool_t AliShuttle::RetrieveFile(UInt_t system, const char* fxsFileName, const char* localFileName)
2438 {
2439         //
2440         // Copies file from FXS to local Shuttle machine
2441         //
2442
2443         // check temp directory: trying to cd to temp; if it does not exist, create it
2444         AliDebug(2, Form("Copy file %s from %s FXS into %s",
2445                         GetSystemName(system), fxsFileName, localFileName));
2446                         
2447         TString tmpDir(localFileName);
2448         
2449         tmpDir = tmpDir(0,tmpDir.Last('/'));
2450
2451         Int_t noDir = gSystem->GetPathInfo(tmpDir.Data(), 0, (Long64_t*) 0, 0, 0);
2452         if (noDir) // temp dir does not exists!
2453         {
2454                 if (gSystem->mkdir(tmpDir.Data(), 1))
2455                 {
2456                         Log(fCurrentDetector.Data(), "RetrieveFile - could not make temp directory!!");
2457                         return kFALSE;
2458                 }
2459         }
2460
2461         TString baseFXSFolder;
2462         if (system == kDAQ)
2463         {
2464                 baseFXSFolder = "FES/";
2465         }
2466         else if (system == kDCS)
2467         {
2468                 baseFXSFolder = "";
2469         }
2470         else if (system == kHLT)
2471         {
2472                 baseFXSFolder = "/opt/FXS/";
2473         }
2474
2475
2476         TString command = Form("scp -oPort=%d -2 %s@%s:%s%s %s",
2477                 fConfig->GetFXSPort(system),
2478                 fConfig->GetFXSUser(system),
2479                 fConfig->GetFXSHost(system),
2480                 baseFXSFolder.Data(),
2481                 fxsFileName,
2482                 localFileName);
2483
2484         AliDebug(2, Form("%s",command.Data()));
2485
2486         Bool_t result = (gSystem->Exec(command.Data()) == 0);
2487
2488         return result;
2489 }
2490
2491 //______________________________________________________________________________________________
2492 TList* AliShuttle::GetFileSources(Int_t system, const char* detector, const char* id)
2493 {
2494         //
2495         // Get sources producing the condition file Id from file exchange servers
2496         // if id is NULL all sources are returned (distinct)
2497         //
2498
2499         if (id)
2500         {
2501                 Log(detector, Form("GetFileSources - Querying %s FXS for files with id %s produced by %s", GetSystemName(system), id, detector));
2502         } else {
2503                 Log(detector, Form("GetFileSources - Querying %s FXS for files produced by %s", GetSystemName(system), detector));
2504         }
2505         
2506         // check if test mode should simulate a FXS error
2507         if (fTestMode & kErrorFXSSources)
2508         {
2509                 Log(detector, Form("GetFileSources - In TESTMODE - Simulating error while connecting to %s FXS", GetSystemName(system)));
2510                 return 0;
2511         }
2512
2513         if (system == kDCS)
2514         {
2515                 Log(detector, "GetFileSources - WARNING: DCS system has only one source of data!");
2516                 TList *list = new TList();
2517                 list->SetOwner(1);
2518                 list->Add(new TObjString(" "));
2519                 return list;
2520         }
2521
2522         // check connection, in case connect
2523         if (!Connect(system))
2524         {
2525                 Log(detector, Form("GetFileSources - Couldn't connect to %s FXS database", GetSystemName(system)));
2526                 return NULL;
2527         }
2528
2529         TString sourceName = 0;
2530         if (system == kDAQ)
2531         {
2532                 sourceName = "DAQsource";
2533         } else if (system == kHLT)
2534         {
2535                 sourceName = "DDLnumbers";
2536         }
2537
2538         TString sqlQueryStart = Form("select distinct %s from %s where", sourceName.Data(), fConfig->GetFXSdbTable(system));
2539         TString whereClause = Form("run=%d and detector=\"%s\"",
2540                                 GetCurrentRun(), detector);
2541         if (id)
2542                 whereClause += Form(" and fileId=\"%s\"", id);
2543         TString sqlQuery = Form("%s %s", sqlQueryStart.Data(), whereClause.Data());
2544
2545         AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
2546
2547         // Query execution
2548         TSQLResult* aResult;
2549         aResult = fServer[system]->Query(sqlQuery);
2550         if (!aResult) {
2551                 Log(detector, Form("GetFileSources - Can't execute SQL query to %s database for id: %s",
2552                                 GetSystemName(system), id));
2553                 return 0;
2554         }
2555
2556         TList *list = new TList();
2557         list->SetOwner(1);
2558         
2559         if (aResult->GetRowCount() == 0)
2560         {
2561                 Log(detector,
2562                         Form("GetFileSources - No entry in %s FXS table for id: %s", GetSystemName(system), id));
2563                 delete aResult;
2564                 return list;
2565         }
2566
2567         Log(detector, Form("GetFileSources - Found %d sources", aResult->GetRowCount()));
2568
2569         TSQLRow* aRow;
2570         while ((aRow = aResult->Next()))
2571         {
2572
2573                 TString source(aRow->GetField(0), aRow->GetFieldLength(0));
2574                 AliDebug(2, Form("%s = %s", sourceName.Data(), source.Data()));
2575                 list->Add(new TObjString(source));
2576                 delete aRow;
2577         }
2578
2579         delete aResult;
2580
2581         return list;
2582 }
2583
2584 //______________________________________________________________________________________________
2585 TList* AliShuttle::GetFileIDs(Int_t system, const char* detector, const char* source)
2586 {
2587         //
2588         // Get all ids of condition files produced by a given source from file exchange servers
2589         //
2590         
2591         Log(detector, Form("GetFileIDs - Retrieving ids with source %s with %s", source, GetSystemName(system)));
2592
2593         // check if test mode should simulate a FXS error
2594         if (fTestMode & kErrorFXSSources)
2595         {
2596                 Log(detector, Form("GetFileIDs - In TESTMODE - Simulating error while connecting to %s FXS", GetSystemName(system)));
2597                 return 0;
2598         }
2599
2600         // check connection, in case connect
2601         if (!Connect(system))
2602         {
2603                 Log(detector, Form("GetFileIDs - Couldn't connect to %s FXS database", GetSystemName(system)));
2604                 return NULL;
2605         }
2606
2607         TString sourceName = 0;
2608         if (system == kDAQ)
2609         {
2610                 sourceName = "DAQsource";
2611         } else if (system == kHLT)
2612         {
2613                 sourceName = "DDLnumbers";
2614         }
2615
2616         TString sqlQueryStart = Form("select fileId from %s where", fConfig->GetFXSdbTable(system));
2617         TString whereClause = Form("run=%d and detector=\"%s\"",
2618                                 GetCurrentRun(), detector);
2619         if (sourceName.Length() > 0 && source)
2620                 whereClause += Form(" and %s=\"%s\"", sourceName.Data(), source);
2621         TString sqlQuery = Form("%s %s", sqlQueryStart.Data(), whereClause.Data());
2622
2623         AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
2624
2625         // Query execution
2626         TSQLResult* aResult;
2627         aResult = fServer[system]->Query(sqlQuery);
2628         if (!aResult) {
2629                 Log(detector, Form("GetFileIDs - Can't execute SQL query to %s database for source: %s",
2630                                 GetSystemName(system), source));
2631                 return 0;
2632         }
2633
2634         TList *list = new TList();
2635         list->SetOwner(1);
2636         
2637         if (aResult->GetRowCount() == 0)
2638         {
2639                 Log(detector,
2640                         Form("GetFileIDs - No entry in %s FXS table for source: %s", GetSystemName(system), source));
2641                 delete aResult;
2642                 return list;
2643         }
2644
2645         Log(detector, Form("GetFileIDs - Found %d ids", aResult->GetRowCount()));
2646
2647         TSQLRow* aRow;
2648
2649         while ((aRow = aResult->Next()))
2650         {
2651
2652                 TString id(aRow->GetField(0), aRow->GetFieldLength(0));
2653                 AliDebug(2, Form("fileId = %s", id.Data()));
2654                 list->Add(new TObjString(id));
2655                 delete aRow;
2656         }
2657
2658         delete aResult;
2659
2660         return list;
2661 }
2662
2663 //______________________________________________________________________________________________
2664 Bool_t AliShuttle::Connect(Int_t system)
2665 {
2666         // Connect to MySQL Server of the system's FXS MySQL databases
2667         // DAQ Logbook, Shuttle Logbook and DAQ FXS db are on the same host
2668         //
2669
2670         // check connection: if already connected return
2671         if(fServer[system] && fServer[system]->IsConnected()) return kTRUE;
2672
2673         TString dbHost, dbUser, dbPass, dbName;
2674
2675         if (system < 3) // FXS db servers
2676         {
2677                 dbHost = Form("mysql://%s:%d", fConfig->GetFXSdbHost(system), fConfig->GetFXSdbPort(system));
2678                 dbUser = fConfig->GetFXSdbUser(system);
2679                 dbPass = fConfig->GetFXSdbPass(system);
2680                 dbName =   fConfig->GetFXSdbName(system);
2681         } else { // Run & Shuttle logbook servers
2682         // TODO Will the Shuttle logbook server be the same as the Run logbook server ???
2683                 dbHost = Form("mysql://%s:%d", fConfig->GetDAQlbHost(), fConfig->GetDAQlbPort());
2684                 dbUser = fConfig->GetDAQlbUser();
2685                 dbPass = fConfig->GetDAQlbPass();
2686                 dbName =   fConfig->GetDAQlbDB();
2687         }
2688
2689         fServer[system] = TSQLServer::Connect(dbHost.Data(), dbUser.Data(), dbPass.Data());
2690         if (!fServer[system] || !fServer[system]->IsConnected()) {
2691                 if(system < 3)
2692                 {
2693                 AliError(Form("Can't establish connection to FXS database for %s",
2694                                         AliShuttleInterface::GetSystemName(system)));
2695                 } else {
2696                 AliError("Can't establish connection to Run logbook.");
2697                 }
2698                 if(fServer[system]) delete fServer[system];
2699                 return kFALSE;
2700         }
2701
2702         // Get tables
2703         TSQLResult* aResult=0;
2704         switch(system){
2705                 case kDAQ:
2706                         aResult = fServer[kDAQ]->GetTables(dbName.Data());
2707                         break;
2708                 case kDCS:
2709                         aResult = fServer[kDCS]->GetTables(dbName.Data());
2710                         break;
2711                 case kHLT:
2712                         aResult = fServer[kHLT]->GetTables(dbName.Data());
2713                         break;
2714                 default:
2715                         aResult = fServer[3]->GetTables(dbName.Data());
2716                         break;
2717         }
2718
2719         delete aResult;
2720         return kTRUE;
2721 }
2722
2723 //______________________________________________________________________________________________
2724 Bool_t AliShuttle::UpdateTable()
2725 {
2726         //
2727         // Update FXS table filling time_processed field in all rows corresponding to current run and detector
2728         //
2729
2730         Bool_t result = kTRUE;
2731
2732         for (UInt_t system=0; system<3; system++)
2733         {
2734                 if(!fFXSCalled[system]) continue;
2735
2736                 // check connection, in case connect
2737                 if (!Connect(system))
2738                 {
2739                         Log(fCurrentDetector, Form("UpdateTable - Couldn't connect to %s FXS database", GetSystemName(system)));
2740                         result = kFALSE;
2741                         continue;
2742                 }
2743
2744                 TTimeStamp now; // now
2745
2746                 // Loop on FXS list entries
2747                 TIter iter(&fFXSlist[system]);
2748                 TObjString *aFXSentry=0;
2749                 while ((aFXSentry = dynamic_cast<TObjString*> (iter.Next())))
2750                 {
2751                         TString aFXSentrystr = aFXSentry->String();
2752                         TObjArray *aFXSarray = aFXSentrystr.Tokenize("#!?!#");
2753                         if (!aFXSarray || aFXSarray->GetEntries() != 2 )
2754                         {
2755                                 Log(fCurrentDetector, Form("UpdateTable - error updating %s FXS entry. Check string: <%s>",
2756                                         GetSystemName(system), aFXSentrystr.Data()));
2757                                 if(aFXSarray) delete aFXSarray;
2758                                 result = kFALSE;
2759                                 continue;
2760                         }
2761                         const char* fileId = ((TObjString*) aFXSarray->At(0))->GetName();
2762                         const char* source = ((TObjString*) aFXSarray->At(1))->GetName();
2763
2764                         TString whereClause;
2765                         if (system == kDAQ)
2766                         {
2767                                 whereClause = Form("where run=%d and detector=\"%s\" and fileId=\"%s\" and DAQsource=\"%s\";",
2768                                                         GetCurrentRun(), fCurrentDetector.Data(), fileId, source);
2769                         }
2770                         else if (system == kDCS)
2771                         {
2772                                 whereClause = Form("where run=%d and detector=\"%s\" and fileId=\"%s\";",
2773                                                         GetCurrentRun(), fCurrentDetector.Data(), fileId);
2774                         }
2775                         else if (system == kHLT)
2776                         {
2777                                 whereClause = Form("where run=%d and detector=\"%s\" and fileId=\"%s\" and DDLnumbers=\"%s\";",
2778                                                         GetCurrentRun(), fCurrentDetector.Data(), fileId, source);
2779                         }
2780
2781                         delete aFXSarray;
2782
2783                         TString sqlQuery = Form("update %s set time_processed=%d %s", fConfig->GetFXSdbTable(system),
2784                                                                 now.GetSec(), whereClause.Data());
2785
2786                         AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
2787
2788                         // Query execution
2789                         TSQLResult* aResult;
2790                         aResult = dynamic_cast<TSQLResult*> (fServer[system]->Query(sqlQuery));
2791                         if (!aResult)
2792                         {
2793                                 Log(fCurrentDetector, Form("UpdateTable - %s db: can't execute SQL query <%s>",
2794                                                                 GetSystemName(system), sqlQuery.Data()));
2795                                 result = kFALSE;
2796                                 continue;
2797                         }
2798                         delete aResult;
2799                 }
2800         }
2801
2802         return result;
2803 }
2804
2805 //______________________________________________________________________________________________
2806 Bool_t AliShuttle::UpdateTableFailCase()
2807 {
2808         // Update FXS table filling time_processed field in all rows corresponding to current run and detector
2809         // this is called in case the preprocessor is declared failed for the current run, because
2810         // the fields are updated only in case of success
2811
2812         Bool_t result = kTRUE;
2813
2814         for (UInt_t system=0; system<3; system++)
2815         {
2816                 // check connection, in case connect
2817                 if (!Connect(system))
2818                 {
2819                         Log(fCurrentDetector, Form("UpdateTableFailCase - Couldn't connect to %s FXS database",
2820                                                         GetSystemName(system)));
2821                         result = kFALSE;
2822                         continue;
2823                 }
2824
2825                 TTimeStamp now; // now
2826
2827                 // Loop on FXS list entries
2828
2829                 TString whereClause = Form("where run=%d and detector=\"%s\";",
2830                                                 GetCurrentRun(), fCurrentDetector.Data());
2831
2832
2833                 TString sqlQuery = Form("update %s set time_processed=%d %s", fConfig->GetFXSdbTable(system),
2834                                                         now.GetSec(), whereClause.Data());
2835
2836                 AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
2837
2838                 // Query execution
2839                 TSQLResult* aResult;
2840                 aResult = dynamic_cast<TSQLResult*> (fServer[system]->Query(sqlQuery));
2841                 if (!aResult)
2842                 {
2843                         Log(fCurrentDetector, Form("UpdateTableFailCase - %s db: can't execute SQL query <%s>",
2844                                                         GetSystemName(system), sqlQuery.Data()));
2845                         result = kFALSE;
2846                         continue;
2847                 }
2848                 delete aResult;
2849         }
2850
2851         return result;
2852 }
2853
2854 //______________________________________________________________________________________________
2855 Bool_t AliShuttle::UpdateShuttleLogbook(const char* detector, const char* status)
2856 {
2857         //
2858         // Update Shuttle logbook filling detector or shuttle_done column
2859         // ex. of usage: UpdateShuttleLogbook("PHOS", "DONE") or UpdateShuttleLogbook("shuttle_done")
2860         //
2861
2862         // check connection, in case connect
2863         if(!Connect(3)){
2864                 Log("SHUTTLE", "UpdateShuttleLogbook - Couldn't connect to DAQ Logbook.");
2865                 return kFALSE;
2866         }
2867
2868         TString detName(detector);
2869         TString setClause;
2870         if (detName == "shuttle_done" || detName == "shuttle_ignored")
2871         {
2872                 setClause = "set shuttle_done=1";
2873
2874                 if (detName == "shuttle_done")
2875                 {
2876                         // Send the information to ML
2877                         TMonaLisaText  mlStatus("SHUTTLE_status", "Done");
2878
2879                         TList mlList;
2880                         mlList.Add(&mlStatus);
2881                 
2882                         TString mlID;
2883                         mlID.Form("%d", GetCurrentRun());
2884                         fMonaLisa->SendParameters(&mlList, mlID);
2885                 }
2886         } else {
2887                 TString statusStr(status);
2888                 if(statusStr.Contains("done", TString::kIgnoreCase) ||
2889                    statusStr.Contains("failed", TString::kIgnoreCase)){
2890                         setClause = Form("set %s=\"%s\"", detector, status);
2891                 } else {
2892                         Log("SHUTTLE",
2893                                 Form("UpdateShuttleLogbook - Invalid status <%s> for detector %s",
2894                                         status, detector));
2895                         return kFALSE;
2896                 }
2897         }
2898
2899         TString whereClause = Form("where run=%d", GetCurrentRun());
2900
2901         TString sqlQuery = Form("update %s %s %s",
2902                                         fConfig->GetShuttlelbTable(), setClause.Data(), whereClause.Data());
2903
2904         AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
2905
2906         // Query execution
2907         TSQLResult* aResult;
2908         aResult = dynamic_cast<TSQLResult*> (fServer[3]->Query(sqlQuery));
2909         if (!aResult) {
2910                 Log("SHUTTLE", Form("UpdateShuttleLogbook - Can't execute query <%s>", sqlQuery.Data()));
2911                 return kFALSE;
2912         }
2913         delete aResult;
2914
2915         return kTRUE;
2916 }
2917
2918 //______________________________________________________________________________________________
2919 Int_t AliShuttle::GetCurrentRun() const
2920 {
2921         //
2922         // Get current run from logbook entry
2923         //
2924
2925         return fLogbookEntry ? fLogbookEntry->GetRun() : -1;
2926 }
2927
2928 //______________________________________________________________________________________________
2929 UInt_t AliShuttle::GetCurrentStartTime() const
2930 {
2931         //
2932         // get current start time
2933         //
2934
2935         return fLogbookEntry ? fLogbookEntry->GetStartTime() : 0;
2936 }
2937
2938 //______________________________________________________________________________________________
2939 UInt_t AliShuttle::GetCurrentEndTime() const
2940 {
2941         //
2942         // get current end time from logbook entry
2943         //
2944
2945         return fLogbookEntry ? fLogbookEntry->GetEndTime() : 0;
2946 }
2947
2948 //______________________________________________________________________________________________
2949 UInt_t AliShuttle::GetCurrentYear() const
2950 {
2951         //
2952         // Get current year from logbook entry
2953         //
2954
2955         if (!fLogbookEntry) return 0;
2956         
2957         TTimeStamp startTime(GetCurrentStartTime());
2958         TString year =  Form("%d",startTime.GetDate());
2959         year = year(0,4);
2960         
2961         return year.Atoi();
2962 }
2963
2964 //______________________________________________________________________________________________
2965 const char* AliShuttle::GetLHCPeriod() const
2966 {
2967         //
2968         // Get current LHC period from logbook entry
2969         //
2970
2971         if (!fLogbookEntry) return 0;
2972                 
2973         return fLogbookEntry->GetRunParameter("LHCperiod");
2974 }
2975
2976 //______________________________________________________________________________________________
2977 void AliShuttle::Log(const char* detector, const char* message)
2978 {
2979         //
2980         // Fill log string with a message
2981         //
2982
2983         TString logRunDir = GetShuttleLogDir();
2984         if (GetCurrentRun() >=0)
2985                 logRunDir += Form("/%d", GetCurrentRun());
2986         
2987         void* dir = gSystem->OpenDirectory(logRunDir.Data());
2988         if (dir == NULL) {
2989                 if (gSystem->mkdir(logRunDir.Data(), kTRUE)) {
2990                         AliError(Form("Can't open directory <%s>", GetShuttleLogDir()));
2991                         return;
2992                 }
2993
2994         } else {
2995                 gSystem->FreeDirectory(dir);
2996         }
2997
2998         TString toLog = Form("%s (%d): %s - ", TTimeStamp(time(0)).AsString("s"), getpid(), detector);
2999         if (GetCurrentRun() >= 0) 
3000                 toLog += Form("run %d - ", GetCurrentRun());
3001         toLog += Form("%s", message);
3002
3003         AliInfo(toLog.Data());
3004         
3005         // if we redirect the log output already to the file, leave here
3006         if (fOutputRedirected && strcmp(detector, "SHUTTLE") != 0)
3007                 return;
3008
3009         TString fileName = GetLogFileName(detector);
3010         
3011         gSystem->ExpandPathName(fileName);
3012
3013         ofstream logFile;
3014         logFile.open(fileName, ofstream::out | ofstream::app);
3015
3016         if (!logFile.is_open()) {
3017                 AliError(Form("Could not open file %s", fileName.Data()));
3018                 return;
3019         }
3020
3021         logFile << toLog.Data() << "\n";
3022
3023         logFile.close();
3024 }
3025
3026 //______________________________________________________________________________________________
3027 TString AliShuttle::GetLogFileName(const char* detector) const
3028 {
3029         // 
3030         // returns the name of the log file for a given sub detector
3031         //
3032         
3033         TString fileName;
3034         
3035         if (GetCurrentRun() >= 0) 
3036         {
3037                 fileName.Form("%s/%d/%s_%d.log", GetShuttleLogDir(), GetCurrentRun(), 
3038                         detector, GetCurrentRun());
3039         } else {
3040                 fileName.Form("%s/%s.log", GetShuttleLogDir(), detector);
3041         }
3042
3043         return fileName;
3044 }
3045
3046 //______________________________________________________________________________________________
3047 void AliShuttle::SendAlive()
3048 {
3049         // sends alive message to ML
3050         
3051         TMonaLisaText mlStatus("SHUTTLE_status", "Alive");
3052
3053         TList mlList;
3054         mlList.Add(&mlStatus);
3055
3056         fMonaLisa->SendParameters(&mlList, "__PROCESSINGINFO__");
3057 }
3058
3059 //______________________________________________________________________________________________
3060 Bool_t AliShuttle::Collect(Int_t run)
3061 {
3062         //
3063         // Collects conditions data for all UNPROCESSED run written to DAQ LogBook in case of run = -1 (default)
3064         // If a dedicated run is given this run is processed
3065         //
3066         // In operational mode, this is the Shuttle function triggered by the EOR signal.
3067         //
3068
3069         if (run == -1)
3070                 Log("SHUTTLE","Collect - Shuttle called. Collecting conditions data for unprocessed runs");
3071         else
3072                 Log("SHUTTLE", Form("Collect - Shuttle called. Collecting conditions data for run %d", run));
3073
3074         SetLastAction("Starting");
3075
3076         // create ML instance
3077         if (!fMonaLisa)
3078                 fMonaLisa = new TMonaLisaWriter(fConfig->GetMonitorHost(), fConfig->GetMonitorTable());
3079                 
3080         SendAlive();
3081         CountOpenRuns();
3082
3083         TString whereClause("where shuttle_done=0");
3084         if (run != -1)
3085                 whereClause += Form(" and run=%d", run);
3086
3087         TObjArray shuttleLogbookEntries;
3088         if (!QueryShuttleLogbook(whereClause, shuttleLogbookEntries))
3089         {
3090                 Log("SHUTTLE", "Collect - Can't retrieve entries from Shuttle logbook");
3091                 return kFALSE;
3092         }
3093
3094         if (shuttleLogbookEntries.GetEntries() == 0)
3095         {
3096                 if (run == -1)
3097                         Log("SHUTTLE","Collect - Found no UNPROCESSED runs in Shuttle logbook");
3098                 else
3099                         Log("SHUTTLE", Form("Collect - Run %d is already DONE "
3100                                                 "or it does not exist in Shuttle logbook", run));
3101                 return kTRUE;
3102         }
3103
3104         for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
3105                 fFirstUnprocessed[iDet] = kTRUE;
3106
3107         if (run != -1)
3108         {
3109                 // query Shuttle logbook for earlier runs, check if some detectors are unprocessed,
3110                 // flag them into fFirstUnprocessed array
3111                 TString whereClause(Form("where shuttle_done=0 and run < %d", run));
3112                 TObjArray tmpLogbookEntries;
3113                 if (!QueryShuttleLogbook(whereClause, tmpLogbookEntries))
3114                 {
3115                         Log("SHUTTLE", "Collect - Can't retrieve entries from Shuttle logbook");
3116                         return kFALSE;
3117                 }
3118
3119                 TIter iter(&tmpLogbookEntries);
3120                 AliShuttleLogbookEntry* anEntry = 0;
3121                 while ((anEntry = dynamic_cast<AliShuttleLogbookEntry*> (iter.Next())))
3122                 {
3123                         for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
3124                         {
3125                                 if (anEntry->GetDetectorStatus(iDet) == AliShuttleLogbookEntry::kUnprocessed)
3126                                 {
3127                                         AliDebug(2, Form("Run %d: setting %s as \"not first time unprocessed\"",
3128                                                         anEntry->GetRun(), GetDetName(iDet)));
3129                                         fFirstUnprocessed[iDet] = kFALSE;
3130                                 }
3131                         }
3132
3133                 }
3134
3135         }
3136
3137         if (!RetrieveConditionsData(shuttleLogbookEntries))
3138         {
3139                 Log("SHUTTLE", "Collect - Process of at least one run failed");
3140                 return kFALSE;
3141         }
3142
3143         Log("SHUTTLE", "Collect - Requested run(s) successfully processed");
3144         return kTRUE;
3145 }
3146
3147 //______________________________________________________________________________________________
3148 Bool_t AliShuttle::RetrieveConditionsData(const TObjArray& dateEntries)
3149 {
3150         //
3151         // Retrieve conditions data for all runs that aren't processed yet
3152         //
3153
3154         Bool_t hasError = kFALSE;
3155
3156         TIter iter(&dateEntries);
3157         AliShuttleLogbookEntry* anEntry;
3158
3159         while ((anEntry = (AliShuttleLogbookEntry*) iter.Next())){
3160                 if (!Process(anEntry)){
3161                         hasError = kTRUE;
3162                 }
3163
3164                 // clean SHUTTLE temp directory
3165                 //TString filename = Form("%s/*.shuttle", GetShuttleTempDir());
3166                 //RemoveFile(filename.Data());
3167         }
3168
3169         return hasError == kFALSE;
3170 }
3171
3172 //______________________________________________________________________________________________
3173 ULong_t AliShuttle::GetTimeOfLastAction() const
3174 {
3175         //
3176         // Gets time of last action
3177         //
3178
3179         ULong_t tmp;
3180
3181         fMonitoringMutex->Lock();
3182
3183         tmp = fLastActionTime;
3184
3185         fMonitoringMutex->UnLock();
3186
3187         return tmp;
3188 }
3189
3190 //______________________________________________________________________________________________
3191 const TString AliShuttle::GetLastAction() const
3192 {
3193         //
3194         // returns a string description of the last action
3195         //
3196
3197         TString tmp;
3198
3199         fMonitoringMutex->Lock();
3200         
3201         tmp = fLastAction;
3202         
3203         fMonitoringMutex->UnLock();
3204
3205         return tmp;
3206 }
3207
3208 //______________________________________________________________________________________________
3209 void AliShuttle::SetLastAction(const char* action)
3210 {
3211         //
3212         // updates the monitoring variables
3213         //
3214
3215         fMonitoringMutex->Lock();
3216
3217         fLastAction = action;
3218         fLastActionTime = time(0);
3219         
3220         fMonitoringMutex->UnLock();
3221 }
3222
3223 //______________________________________________________________________________________________
3224 const char* AliShuttle::GetRunParameter(const char* param)
3225 {
3226         //
3227         // returns run parameter read from DAQ logbook
3228         //
3229
3230         if(!fLogbookEntry) {
3231                 AliError("No logbook entry!");
3232                 return 0;
3233         }
3234
3235         return fLogbookEntry->GetRunParameter(param);
3236 }
3237
3238 //______________________________________________________________________________________________
3239 AliCDBEntry* AliShuttle::GetFromOCDB(const char* detector, const AliCDBPath& path)
3240 {
3241         //
3242         // returns object from OCDB valid for current run
3243         //
3244
3245         if (fTestMode & kErrorOCDB)
3246         {
3247                 Log(detector, "GetFromOCDB - In TESTMODE - Simulating error with OCDB");
3248                 return 0;
3249         }
3250         
3251         AliCDBStorage *sto = AliCDBManager::Instance()->GetStorage(fgkMainCDB);
3252         if (!sto)
3253         {
3254                 Log(detector, "GetFromOCDB - Cannot activate main OCDB for query!");
3255                 return 0;
3256         }
3257
3258         return dynamic_cast<AliCDBEntry*> (sto->Get(path, GetCurrentRun()));
3259 }
3260
3261 //______________________________________________________________________________________________
3262 Bool_t AliShuttle::SendMail()
3263 {
3264         //
3265         // sends a mail to the subdetector expert in case of preprocessor error
3266         //
3267         
3268         if (fTestMode != kNone)
3269                 return kTRUE;
3270
3271         TString to="";
3272         TIter iterExperts(fConfig->GetResponsibles(fCurrentDetector));
3273         TObjString *anExpert=0;
3274         while ((anExpert = (TObjString*) iterExperts.Next()))
3275         {
3276                 to += Form("%s,", anExpert->GetName());
3277         }
3278         if (to.Length() > 0)
3279           to.Remove(to.Length()-1);
3280         AliDebug(2, Form("to: %s",to.Data()));
3281
3282         if (to.IsNull()) {
3283                 Log("SHUTTLE", "List of detector responsibles not yet set!");
3284                 return kFALSE;
3285         }
3286
3287         void* dir = gSystem->OpenDirectory(GetShuttleLogDir());
3288         if (dir == NULL)
3289         {
3290                 if (gSystem->mkdir(GetShuttleLogDir(), kTRUE))
3291                 {
3292                         Log("SHUTTLE", Form("SendMail - Can't open directory <%s>", GetShuttleLogDir()));
3293                         return kFALSE;
3294                 }
3295
3296         } else {
3297                 gSystem->FreeDirectory(dir);
3298         }
3299
3300         TString bodyFileName;
3301         bodyFileName.Form("%s/mail.body", GetShuttleLogDir());
3302         gSystem->ExpandPathName(bodyFileName);
3303
3304         ofstream mailBody;
3305         mailBody.open(bodyFileName, ofstream::out);
3306
3307         if (!mailBody.is_open())
3308         {
3309                 Log("SHUTTLE", Form("Could not open mail body file %s", bodyFileName.Data()));
3310                 return kFALSE;
3311         }
3312
3313         TString cc="alberto.colla@cern.ch";
3314
3315         TString subject = Form("%s Shuttle preprocessor FAILED in run %d (run type = %s)!",
3316                                 fCurrentDetector.Data(), GetCurrentRun(), GetRunType());
3317         AliDebug(2, Form("subject: %s", subject.Data()));
3318
3319         TString body = Form("Dear %s expert(s), \n\n", fCurrentDetector.Data());
3320         body += Form("SHUTTLE just detected that your preprocessor "
3321                         "failed processing run %d (run type = %s)!!\n\n", 
3322                                         GetCurrentRun(), GetRunType());
3323         body += Form("Please check %s status on the SHUTTLE monitoring page: \n\n", 
3324                                 fCurrentDetector.Data());
3325         if (fConfig->GetRunMode() == AliShuttleConfig::kTest)
3326         {
3327                 body += Form("\thttp://pcalimonitor.cern.ch:8889/shuttle.jsp?time=168 \n\n");
3328         } else {
3329                 body += Form("\thttp://pcalimonitor.cern.ch/shuttle.jsp?instance=PROD&time=168 \n\n");
3330         }
3331         
3332         
3333         TString logFolder = "logs";
3334         if (fConfig->GetRunMode() == AliShuttleConfig::kProd) 
3335                 logFolder += "_PROD";
3336         
3337         
3338         body += Form("Find the %s log for the current run on \n\n"
3339                 "\thttp://pcalishuttle01.cern.ch:8880/%s/%d/%s_%d.log \n\n", 
3340                 fCurrentDetector.Data(), logFolder.Data(), GetCurrentRun(), 
3341                                 fCurrentDetector.Data(), GetCurrentRun());
3342         body += Form("The last 10 lines of %s log file are following:\n\n", fCurrentDetector.Data());
3343
3344         AliDebug(2, Form("Body begin: %s", body.Data()));
3345
3346         mailBody << body.Data();
3347         mailBody.close();
3348         mailBody.open(bodyFileName, ofstream::out | ofstream::app);
3349
3350         TString logFileName = Form("%s/%d/%s_%d.log", GetShuttleLogDir(), 
3351                 GetCurrentRun(), fCurrentDetector.Data(), GetCurrentRun());
3352         TString tailCommand = Form("tail -n 10 %s >> %s", logFileName.Data(), bodyFileName.Data());
3353         if (gSystem->Exec(tailCommand.Data()))
3354         {
3355                 mailBody << Form("%s log file not found ...\n\n", fCurrentDetector.Data());
3356         }
3357
3358         TString endBody = Form("------------------------------------------------------\n\n");
3359         endBody += Form("In case of problems please contact the SHUTTLE core team.\n\n");
3360         endBody += "Please do not answer this message directly, it is automatically generated.\n\n";
3361         endBody += "Greetings,\n\n \t\t\tthe SHUTTLE\n";
3362
3363         AliDebug(2, Form("Body end: %s", endBody.Data()));
3364
3365         mailBody << endBody.Data();
3366
3367         mailBody.close();
3368
3369         // send mail!
3370         TString mailCommand = Form("mail -s \"%s\" -c %s %s < %s",
3371                                                 subject.Data(),
3372                                                 cc.Data(),
3373                                                 to.Data(),
3374                                                 bodyFileName.Data());
3375         AliDebug(2, Form("mail command: %s", mailCommand.Data()));
3376
3377         Bool_t result = gSystem->Exec(mailCommand.Data());
3378
3379         return result == 0;
3380 }
3381
3382 //______________________________________________________________________________________________
3383 Bool_t AliShuttle::SendMailToDCS()
3384 {
3385         //
3386         // sends a mail to the DCS experts in case of DCS error
3387         //
3388         
3389         if (fTestMode != kNone)
3390                 return kTRUE;
3391
3392         void* dir = gSystem->OpenDirectory(GetShuttleLogDir());
3393         if (dir == NULL)
3394         {
3395                 if (gSystem->mkdir(GetShuttleLogDir(), kTRUE))
3396                 {
3397                         Log("SHUTTLE", Form("SendMailToDCS - Can't open directory <%s>", GetShuttleLogDir()));
3398                         return kFALSE;
3399                 }
3400
3401         } else {
3402                 gSystem->FreeDirectory(dir);
3403         }
3404
3405         TString bodyFileName;
3406         bodyFileName.Form("%s/mail.body", GetShuttleLogDir());
3407         gSystem->ExpandPathName(bodyFileName);
3408
3409         ofstream mailBody;
3410         mailBody.open(bodyFileName, ofstream::out);
3411
3412         if (!mailBody.is_open())
3413         {
3414                 Log("SHUTTLE", Form("SendMailToDCS - Could not open mail body file %s", bodyFileName.Data()));
3415                 return kFALSE;
3416         }
3417
3418         TString to="Vladimir.Fekete@cern.ch, Svetozar.Kapusta@cern.ch";
3419         //TString to="alberto.colla@cern.ch";
3420         AliDebug(2, Form("to: %s",to.Data()));
3421
3422         if (to.IsNull()) {
3423                 Log("SHUTTLE", "List of detector responsibles not yet set!");
3424                 return kFALSE;
3425         }
3426
3427         TString cc="alberto.colla@cern.ch";
3428
3429         TString subject = Form("Retrieval of data points for %s FAILED in run %d !",
3430                                 fCurrentDetector.Data(), GetCurrentRun());
3431         AliDebug(2, Form("subject: %s", subject.Data()));
3432
3433         TString body = Form("Dear DCS experts, \n\n");
3434         body += Form("SHUTTLE couldn\'t retrieve the data points for detector %s "
3435                         "in run %d!!\n\n", fCurrentDetector.Data(), GetCurrentRun());
3436         body += Form("Please check %s status on the SHUTTLE monitoring page: \n\n", 
3437                                 fCurrentDetector.Data());
3438         if (fConfig->GetRunMode() == AliShuttleConfig::kTest)
3439         {
3440                 body += Form("\thttp://pcalimonitor.cern.ch:8889/shuttle.jsp?time=168 \n\n");
3441         } else {
3442                 body += Form("\thttp://pcalimonitor.cern.ch/shuttle.jsp?instance=PROD?time=168 \n\n");
3443         }
3444
3445         TString logFolder = "logs";
3446         if (fConfig->GetRunMode() == AliShuttleConfig::kProd) 
3447                 logFolder += "_PROD";
3448         
3449         
3450         body += Form("Find the %s log for the current run on \n\n"
3451                 "\thttp://pcalishuttle01.cern.ch:8880/%s/%d/%s_%d.log \n\n", 
3452                 fCurrentDetector.Data(), logFolder.Data(), GetCurrentRun(), 
3453                                 fCurrentDetector.Data(), GetCurrentRun());
3454         body += Form("The last 10 lines of %s log file are following:\n\n", fCurrentDetector.Data());
3455
3456         AliDebug(2, Form("Body begin: %s", body.Data()));
3457
3458         mailBody << body.Data();
3459         mailBody.close();
3460         mailBody.open(bodyFileName, ofstream::out | ofstream::app);
3461
3462         TString logFileName = Form("%s/%d/%s_%d.log", GetShuttleLogDir(), GetCurrentRun(),
3463                 fCurrentDetector.Data(), GetCurrentRun());
3464         TString tailCommand = Form("tail -n 10 %s >> %s", logFileName.Data(), bodyFileName.Data());
3465         if (gSystem->Exec(tailCommand.Data()))
3466         {
3467                 mailBody << Form("%s log file not found ...\n\n", fCurrentDetector.Data());
3468         }
3469
3470         TString endBody = Form("------------------------------------------------------\n\n");
3471         endBody += Form("In case of problems please contact the SHUTTLE core team.\n\n");
3472         endBody += "Please do not answer this message directly, it is automatically generated.\n\n";
3473         endBody += "Greetings,\n\n \t\t\tthe SHUTTLE\n";
3474
3475         AliDebug(2, Form("Body end: %s", endBody.Data()));
3476
3477         mailBody << endBody.Data();
3478
3479         mailBody.close();
3480
3481         // send mail!
3482         TString mailCommand = Form("mail -s \"%s\" -c %s %s < %s",
3483                                                 subject.Data(),
3484                                                 cc.Data(),
3485                                                 to.Data(),
3486                                                 bodyFileName.Data());
3487         AliDebug(2, Form("mail command: %s", mailCommand.Data()));
3488
3489         Bool_t result = gSystem->Exec(mailCommand.Data());
3490
3491         return result == 0;
3492 }
3493
3494 //______________________________________________________________________________________________
3495 const char* AliShuttle::GetRunType()
3496 {
3497         //
3498         // returns run type read from "run type" logbook
3499         //
3500
3501         if(!fLogbookEntry) {
3502                 AliError("No logbook entry!");
3503                 return 0;
3504         }
3505
3506         return fLogbookEntry->GetRunType();
3507 }
3508
3509 //______________________________________________________________________________________________
3510 Bool_t AliShuttle::GetHLTStatus()
3511 {
3512         // Return HLT status (ON=1 OFF=0)
3513         // Converts the HLT status from the status string read in the run logbook (not just a bool)
3514
3515         if(!fLogbookEntry) {
3516                 AliError("No logbook entry!");
3517                 return 0;
3518         }
3519
3520         // TODO implement when HLTStatus is inserted in run logbook
3521         //TString hltStatus = fLogbookEntry->GetRunParameter("HLTStatus");
3522         //if(hltStatus == "OFF") {return kFALSE};
3523
3524         return kTRUE;
3525 }
3526
3527 //______________________________________________________________________________________________
3528 void AliShuttle::SetShuttleTempDir(const char* tmpDir)
3529 {
3530         //
3531         // sets Shuttle temp directory
3532         //
3533
3534         fgkShuttleTempDir = gSystem->ExpandPathName(tmpDir);
3535 }
3536
3537 //______________________________________________________________________________________________
3538 void AliShuttle::SetShuttleLogDir(const char* logDir)
3539 {
3540         //
3541         // sets Shuttle log directory
3542         //
3543
3544         fgkShuttleLogDir = gSystem->ExpandPathName(logDir);
3545 }