]> git.uio.no Git - u/mrichter/AliRoot.git/blob - SHUTTLE/AliShuttle.cxx
Fixes to AliFMDPreprocessor from Hans.
[u/mrichter/AliRoot.git] / SHUTTLE / AliShuttle.cxx
1 /**************************************************************************
2  * Copyright(c) 1998-1999, ALICE Experiment at CERN, All rights reserved. *
3  *                                                                        *
4  * Author: The ALICE Off-line Project.                                    *
5  * Contributors are mentioned in the code where appropriate.              *
6  *                                                                        *
7  * Permission to use, copy, modify and distribute this software and its   *
8  * documentation strictly for non-commercial purposes is hereby granted   *
9  * without fee, provided that the above copyright notice appears in all   *
10  * copies and that both the copyright notice and this permission notice   *
11  * appear in the supporting documentation. The authors make no claims     *
12  * about the suitability of this software for any purpose. It is          *
13  * provided "as is" without express or implied warranty.                  *
14  **************************************************************************/
15
16 /*
17 $Log$
18 Revision 1.80  2007/12/20 13:31:28  acolla
19 Bug fix (Jan Fiete): recovering from StoreError, if the store to OCDB is successful,
20 the Shuttle sets current detector's status=done
21
22 Revision 1.79  2007/12/19 14:03:01  acolla
23
24 detector name to build the lhcPeriod_DET is to be looked in "detector" column, not "partition"
25
26 Revision 1.78  2007/12/19 11:50:41  acolla
27
28 Raw data tag merged files is written in /alice/data/.../lhcPeriod_DET/runNb/raw if partition is made of DET only
29
30 Revision 1.77  2007/12/19 11:16:16  acolla
31 More meaningful log message added in GetFileSources
32
33 Revision 1.76  2007/12/19 07:45:20  acolla
34 bug fix in the name of the raw tag files (Raw instead of raw)
35
36 Revision 1.75  2007/12/18 15:42:14  jgrosseo
37 adding number of open runs to monitoring
38
39 Revision 1.74  2007/12/17 03:23:32  jgrosseo
40 several bugfixes
41 added "empty preprocessor" as placeholder for Acorde in FDR
42
43 Revision 1.73  2007/12/14 19:31:36  acolla
44 Sending email to DCS experts is temporarily commented
45
46 Revision 1.72  2007/12/13 15:44:28  acolla
47 Run type added in mail sent to detector expert (eases understanding)
48
49 Revision 1.71  2007/12/12 14:56:14  jgrosseo
50 sending shuttle_ignore to ML also in case of 0 events
51
52 Revision 1.70  2007/12/12 13:45:35  acolla
53 Monalisa started in Collect() function. Alive message to monitor is sent at each Collect and every minute during preprocessor processing.
54
55 Revision 1.69  2007/12/12 10:06:29  acolla
56 in AliShuttle.cxx: SHUTTLE logbook is updated in case of invalid run times:
57
58 time_start==0 && time_end==0
59
60 logbook is NOT updated if time_start != 0 && time_end == 0, because it may mean that the run is still ongoing.
61
62 Revision 1.68  2007/12/11 10:15:17  acolla
63 Added marking SHUTTLE=DONE for invalid runs
64 (invalid start time or end time) and runs with totalEvents < 1
65
66 Revision 1.67  2007/12/07 19:14:36  acolla
67 in AliShuttleTrigger:
68
69 Added automatic collection of new runs on a regular time basis (settable from the configuration)
70
71 in AliShuttleConfig: new members
72
73 - triggerWait: time to wait for DIM trigger (s) before starting automatic collection of new runs
74 - mode: run mode (test, prod) -> used to build log folder (logs or logs_PROD)
75
76 in AliShuttle:
77
78 - logs now stored in logs/#RUN/DET_#RUN.log
79
80 Revision 1.66  2007/12/05 10:45:19  jgrosseo
81 changed order of arguments to TMonaLisaWriter
82
83 Revision 1.65  2007/11/26 16:58:37  acolla
84 Monalisa configuration added: host and table name
85
86 Revision 1.64  2007/11/13 16:15:47  acolla
87 DCS map is stored in a file in the temp folder where the detector is processed.
88 If the preprocessor fails, the temp folder is not removed. This will help the debugging of the problem.
89
90 Revision 1.63  2007/11/02 10:53:16  acolla
91 Protection added to AliShuttle::CopyFileLocally
92
93 Revision 1.62  2007/10/31 18:23:13  acolla
94 Furter developement on the Shuttle:
95
96 - Shuttle now connects to the Grid as alidaq. The OCDB and Reference folders
97 are now built from /alice/data, e.g.:
98 /alice/data/2007/LHC07a/OCDB
99
100 the year and LHC period are taken from the Shuttle.
101 Raw metadata files are stored by GRP to:
102 /alice/data/2007/LHC07a/<runNb>/Raw/RunMetadata.root
103
104 - Shuttle sends a mail to DCS experts each time DP retrieval fails.
105
106 Revision 1.61  2007/10/30 20:33:51  acolla
107 Improved managing of temporary folders, which weren't correctly handled.
108 Resolved bug introduced in StoreReferenceFile, which caused SPD preprocessor fail.
109
110 Revision 1.60  2007/10/29 18:06:16  acolla
111
112 New function StoreRunMetadataFile added to preprocessor and Shuttle interface
113 This function can be used by GRP only. It stores raw data tags merged file to the
114 raw data folder (e.g. /alice/data/2008/LHC08a/000099999/Raw).
115
116 KNOWN ISSUES:
117
118 1. Shuttle cannot write to /alice/data/ because it belongs to alidaq. Tag file is stored in /alice/simulation/... for the time being.
119 2. Due to a bug in TAlien::Mkdir, the creation of a folder in recursive mode (-p option) does not work. The problem
120 has been corrected in the root package on the Shuttle machine.
121
122 Revision 1.59  2007/10/05 12:40:55  acolla
123
124 Result error code added to AliDCSClient data members (it was "lost" with the new implementation of TMap* GetAliasValues and GetDPValues).
125
126 Revision 1.58  2007/09/28 15:27:40  acolla
127
128 AliDCSClient "multiSplit" option added in the DCS configuration
129 in AliDCSMessage: variable MAX_BODY_SIZE set to 500000
130
131 Revision 1.57  2007/09/27 16:53:13  acolla
132 Detectors can have more than one AMANDA server. SHUTTLE queries the servers sequentially,
133 merges the dcs aliases/DPs in one TMap and sends it to the preprocessor.
134
135 Revision 1.56  2007/09/14 16:46:14  jgrosseo
136 1) Connect and Close are called before and after each query, so one can
137 keep the same AliDCSClient object.
138 2) The splitting of a query is moved to GetDPValues/GetAliasValues.
139 3) Splitting interval can be specified in constructor
140
141 Revision 1.55  2007/08/06 12:26:40  acolla
142 Function Bool_t GetHLTStatus added to preprocessor. It returns the status of HLT
143 read from the run logbook.
144
145 Revision 1.54  2007/07/12 09:51:25  jgrosseo
146 removed duplicated log message in GetFile
147
148 Revision 1.53  2007/07/12 09:26:28  jgrosseo
149 updating hlt fxs base path
150
151 Revision 1.52  2007/07/12 08:06:45  jgrosseo
152 adding log messages in getfile... functions
153 adding not implemented copy constructor in alishuttleconfigholder
154
155 Revision 1.51  2007/07/03 17:24:52  acolla
156 root moved to v5-16-00. TFileMerger->Cp moved to TFile::Cp.
157
158 Revision 1.50  2007/07/02 17:19:32  acolla
159 preprocessor is run in a temp directory that is removed when process is finished.
160
161 Revision 1.49  2007/06/29 10:45:06  acolla
162 Number of columns in MySql Shuttle logbook increased by one (HLT added)
163
164 Revision 1.48  2007/06/21 13:06:19  acolla
165 GetFileSources returns dummy list with 1 source if system=DCS (better than
166 returning error as it was)
167
168 Revision 1.47  2007/06/19 17:28:56  acolla
169 HLT updated; missing map bug removed.
170
171 Revision 1.46  2007/06/09 13:01:09  jgrosseo
172 Switching to retrieval of several DCS DPs at a time (multiDPrequest)
173
174 Revision 1.45  2007/05/30 06:35:20  jgrosseo
175 Adding functionality to the Shuttle/TestShuttle:
176 o) Function to retrieve list of sources from a given system (GetFileSources with id=0)
177 o) Function to retrieve list of IDs for a given source      (GetFileIDs)
178 These functions are needed for dealing with the tag files that are saved for the GRP preprocessor
179 Example code has been added to the TestProcessor in TestShuttle
180
181 Revision 1.44  2007/05/11 16:09:32  acolla
182 Reference files for ITS, MUON and PHOS are now stored in OfflineDetName/OnlineDetName/run_...
183 example: ITS/SPD/100_filename.root
184
185 Revision 1.43  2007/05/10 09:59:51  acolla
186 Various bug fixes in StoreRefFilesToGrid; Cleaning of reference storage before processing detector (CleanReferenceStorage)
187
188 Revision 1.42  2007/05/03 08:01:39  jgrosseo
189 typo in last commit :-(
190
191 Revision 1.41  2007/05/03 08:00:48  jgrosseo
192 fixing log message when pp want to skip dcs value retrieval
193
194 Revision 1.40  2007/04/27 07:06:48  jgrosseo
195 GetFileSources returns empty list in case of no files, but successful query
196 No mails sent in testmode
197
198 Revision 1.39  2007/04/17 12:43:57  acolla
199 Correction in StoreOCDB; change of text in mail to detector expert
200
201 Revision 1.38  2007/04/12 08:26:18  jgrosseo
202 updated comment
203
204 Revision 1.37  2007/04/10 16:53:14  jgrosseo
205 redirecting sub detector stdout, stderr to sub detector log file
206
207 Revision 1.35  2007/04/04 16:26:38  acolla
208 1. Re-organization of function calls in TestPreprocessor to make it more meaningful.
209 2. Added missing dependency in test preprocessors.
210 3. in AliShuttle.cxx: processing time and memory consumption info on a single line.
211
212 Revision 1.34  2007/04/04 10:33:36  jgrosseo
213 1) Storing of files to the Grid is now done _after_ your preprocessors succeeded. This is transparent, which means that you can still use the same functions (Store, StoreReferenceData) to store files to the Grid. However, the Shuttle first stores them locally and transfers them after the preprocessor finished. The return code of these two functions has changed from UInt_t to Bool_t which gives you the success of the storing.
214 In case of an error with the Grid, the Shuttle will retry the storing later, the preprocessor does not need to be run again.
215
216 2) The meaning of the return code of the preprocessor has changed. 0 is now success and any other value means failure. This value is stored in the log and you can use it to keep details about the error condition.
217
218 3) New function StoreReferenceFile to _directly_ store a file (without opening it) to the reference storage.
219
220 4) The memory usage of the preprocessor is monitored. If it exceeds 2 GB it is terminated.
221
222 5) New function AliPreprocessor::ProcessDCS(). If you do not need to have DCS data in all cases, you can skip the processing by implemting this function and returning kFALSE under certain conditions. E.g. if there is a certain run type.
223 If you always need DCS data (like before), you do not need to implement it.
224
225 6) The run type has been added to the monitoring page
226
227 Revision 1.33  2007/04/03 13:56:01  acolla
228 Grid Storage at the end of preprocessing. Added virtual method to disable DCS query according to the
229 run type.
230
231 Revision 1.32  2007/02/28 10:41:56  acolla
232 Run type field added in SHUTTLE framework. Run type is read from "run type" logbook and retrieved by
233 AliPreprocessor::GetRunType() function.
234 Added some ldap definition files.
235
236 Revision 1.30  2007/02/13 11:23:21  acolla
237 Moved getters and setters of Shuttle's main OCDB/Reference, local
238 OCDB/Reference, temp and log folders to AliShuttleInterface
239
240 Revision 1.27  2007/01/30 17:52:42  jgrosseo
241 adding monalisa monitoring
242
243 Revision 1.26  2007/01/23 19:20:03  acolla
244 Removed old ldif files, added TOF, MCH ldif files. Added some options in
245 AliShuttleConfig::Print. Added in Ali Shuttle: SetShuttleTempDir and
246 SetShuttleLogDir
247
248 Revision 1.25  2007/01/15 19:13:52  acolla
249 Moved some AliInfo to AliDebug in SendMail function
250
251 Revision 1.21  2006/12/07 08:51:26  jgrosseo
252 update (alberto):
253 table, db names in ldap configuration
254 added GRP preprocessor
255 DCS data can also be retrieved by data point
256
257 Revision 1.20  2006/11/16 16:16:48  jgrosseo
258 introducing strict run ordering flag
259 removed giving preprocessor name to preprocessor, they have to know their name themselves ;-)
260
261 Revision 1.19  2006/11/06 14:23:04  jgrosseo
262 major update (Alberto)
263 o) reading of run parameters from the logbook
264 o) online offline naming conversion
265 o) standalone DCSclient package
266
267 Revision 1.18  2006/10/20 15:22:59  jgrosseo
268 o) Adding time out to the execution of the preprocessors: The Shuttle forks and the parent process monitors the child
269 o) Merging Collect, CollectAll, CollectNew function
270 o) Removing implementation of empty copy constructors (declaration still there!)
271
272 Revision 1.17  2006/10/05 16:20:55  jgrosseo
273 adapting to new CDB classes
274
275 Revision 1.16  2006/10/05 15:46:26  jgrosseo
276 applying to the new interface
277
278 Revision 1.15  2006/10/02 16:38:39  jgrosseo
279 update (alberto):
280 fixed memory leaks
281 storing of objects that failed to be stored to the grid before
282 interfacing of shuttle status table in daq system
283
284 Revision 1.14  2006/08/29 09:16:05  jgrosseo
285 small update
286
287 Revision 1.13  2006/08/15 10:50:00  jgrosseo
288 effc++ corrections (alberto)
289
290 Revision 1.12  2006/08/08 14:19:29  jgrosseo
291 Update to shuttle classes (Alberto)
292
293 - Possibility to set the full object's path in the Preprocessor's and
294 Shuttle's  Store functions
295 - Possibility to extend the object's run validity in the same classes
296 ("startValidity" and "validityInfinite" parameters)
297 - Implementation of the StoreReferenceData function to store reference
298 data in a dedicated CDB storage.
299
300 Revision 1.11  2006/07/21 07:37:20  jgrosseo
301 last run is stored after each run
302
303 Revision 1.10  2006/07/20 09:54:40  jgrosseo
304 introducing status management: The processing per subdetector is divided into several steps,
305 after each step the status is stored on disk. If the system crashes in any of the steps the Shuttle
306 can keep track of the number of failures and skips further processing after a certain threshold is
307 exceeded. These thresholds can be configured in LDAP.
308
309 Revision 1.9  2006/07/19 10:09:55  jgrosseo
310 new configuration, accesst to DAQ FES (Alberto)
311
312 Revision 1.8  2006/07/11 12:44:36  jgrosseo
313 adding parameters for extended validity range of data produced by preprocessor
314
315 Revision 1.7  2006/07/10 14:37:09  jgrosseo
316 small fix + todo comment
317
318 Revision 1.6  2006/07/10 13:01:41  jgrosseo
319 enhanced storing of last sucessfully processed run (alberto)
320
321 Revision 1.5  2006/07/04 14:59:57  jgrosseo
322 revision of AliDCSValue: Removed wrapper classes, reduced storage size per value by factor 2
323
324 Revision 1.4  2006/06/12 09:11:16  jgrosseo
325 coding conventions (Alberto)
326
327 Revision 1.3  2006/06/06 14:26:40  jgrosseo
328 o) removed files that were moved to STEER
329 o) shuttle updated to follow the new interface (Alberto)
330
331 Revision 1.2  2006/03/07 07:52:34  hristov
332 New version (B.Yordanov)
333
334 Revision 1.6  2005/11/19 17:19:14  byordano
335 RetrieveDATEEntries and RetrieveConditionsData added
336
337 Revision 1.5  2005/11/19 11:09:27  byordano
338 AliShuttle declaration added
339
340 Revision 1.4  2005/11/17 17:47:34  byordano
341 TList changed to TObjArray
342
343 Revision 1.3  2005/11/17 14:43:23  byordano
344 import to local CVS
345
346 Revision 1.1.1.1  2005/10/28 07:33:58  hristov
347 Initial import as subdirectory in AliRoot
348
349 Revision 1.2  2005/09/13 08:41:15  byordano
350 default startTime endTime added
351
352 Revision 1.4  2005/08/30 09:13:02  byordano
353 some docs added
354
355 Revision 1.3  2005/08/29 21:15:47  byordano
356 some docs added
357
358 */
359
360 //
361 // This class is the main manager for AliShuttle. 
362 // It organizes the data retrieval from DCS and call the 
363 // interface methods of AliPreprocessor.
364 // For every detector in AliShuttleConfgi (see AliShuttleConfig),
365 // data for its set of aliases is retrieved. If there is registered
366 // AliPreprocessor for this detector then it will be used
367 // accroding to the schema (see AliPreprocessor).
368 // If there isn't registered AliPreprocessor than the retrieved
369 // data is stored automatically to the undelying AliCDBStorage.
370 // For detSpec is used the alias name.
371 //
372
373 #include "AliShuttle.h"
374
375 #include "AliCDBManager.h"
376 #include "AliCDBStorage.h"
377 #include "AliCDBId.h"
378 #include "AliCDBRunRange.h"
379 #include "AliCDBPath.h"
380 #include "AliCDBEntry.h"
381 #include "AliShuttleConfig.h"
382 #include "DCSClient/AliDCSClient.h"
383 #include "AliLog.h"
384 #include "AliPreprocessor.h"
385 #include "AliShuttleStatus.h"
386 #include "AliShuttleLogbookEntry.h"
387
388 #include <TSystem.h>
389 #include <TObject.h>
390 #include <TString.h>
391 #include <TTimeStamp.h>
392 #include <TObjString.h>
393 #include <TSQLServer.h>
394 #include <TSQLResult.h>
395 #include <TSQLRow.h>
396 #include <TMutex.h>
397 #include <TSystemDirectory.h>
398 #include <TSystemFile.h>
399 #include <TFile.h>
400 #include <TGrid.h>
401 #include <TGridResult.h>
402
403 #include <TMonaLisaWriter.h>
404
405 #include <fstream>
406
407 #include <sys/types.h>
408 #include <sys/wait.h>
409
410 ClassImp(AliShuttle)
411
412 //______________________________________________________________________________________________
413 AliShuttle::AliShuttle(const AliShuttleConfig* config,
414                 UInt_t timeout, Int_t retries):
415 fConfig(config),
416 fTimeout(timeout), fRetries(retries),
417 fPreprocessorMap(),
418 fLogbookEntry(0),
419 fCurrentDetector(),
420 fStatusEntry(0),
421 fMonitoringMutex(0),
422 fLastActionTime(0),
423 fLastAction(),
424 fMonaLisa(0),
425 fTestMode(kNone),
426 fReadTestMode(kFALSE),
427 fOutputRedirected(kFALSE)
428 {
429         //
430         // config: AliShuttleConfig used
431         // timeout: timeout used for AliDCSClient connection
432         // retries: the number of retries in case of connection error.
433         //
434
435         if (!fConfig->IsValid()) AliFatal("********** !!!!! Invalid configuration !!!!! **********");
436         for(int iSys=0;iSys<4;iSys++) {
437                 fServer[iSys]=0;
438                 if (iSys < 3)
439                         fFXSlist[iSys].SetOwner(kTRUE);
440         }
441         fPreprocessorMap.SetOwner(kTRUE);
442
443         for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
444                 fFirstUnprocessed[iDet] = kFALSE;
445
446         fMonitoringMutex = new TMutex();
447 }
448
449 //______________________________________________________________________________________________
450 AliShuttle::~AliShuttle()
451 {
452         //
453         // destructor
454         //
455
456         fPreprocessorMap.DeleteAll();
457         for(int iSys=0;iSys<4;iSys++)
458                 if(fServer[iSys]) {
459                         fServer[iSys]->Close();
460                         delete fServer[iSys];
461                         fServer[iSys] = 0;
462                 }
463
464         if (fStatusEntry){
465                 delete fStatusEntry;
466                 fStatusEntry = 0;
467         }
468         
469         if (fMonitoringMutex) 
470         {
471                 delete fMonitoringMutex;
472                 fMonitoringMutex = 0;
473         }
474 }
475
476 //______________________________________________________________________________________________
477 void AliShuttle::RegisterPreprocessor(AliPreprocessor* preprocessor)
478 {
479         //
480         // Registers new AliPreprocessor.
481         // It uses GetName() for indentificator of the pre processor.
482         // The pre processor is registered it there isn't any other
483         // with the same identificator (GetName()).
484         //
485
486         const char* detName = preprocessor->GetName();
487         if(GetDetPos(detName) < 0)
488                 AliFatal(Form("********** !!!!! Invalid detector name: %s !!!!! **********", detName));
489
490         if (fPreprocessorMap.GetValue(detName)) {
491                 AliWarning(Form("AliPreprocessor %s is already registered!", detName));
492                 return;
493         }
494
495         fPreprocessorMap.Add(new TObjString(detName), preprocessor);
496 }
497 //______________________________________________________________________________________________
498 Bool_t AliShuttle::Store(const AliCDBPath& path, TObject* object,
499                 AliCDBMetaData* metaData, Int_t validityStart, Bool_t validityInfinite)
500 {
501         // Stores a CDB object in the storage for offline reconstruction. Objects that are not needed for
502         // offline reconstruction, but should be stored anyway (e.g. for debugging) should NOT be stored
503         // using this function. Use StoreReferenceData instead!
504         // It calls StoreLocally function which temporarily stores the data locally; when the preprocessor
505         // finishes the data are transferred to the main storage (Grid).
506
507         return StoreLocally(fgkLocalCDB, path, object, metaData, validityStart, validityInfinite);
508 }
509
510 //______________________________________________________________________________________________
511 Bool_t AliShuttle::StoreReferenceData(const AliCDBPath& path, TObject* object, AliCDBMetaData* metaData)
512 {
513         // Stores a CDB object in the storage for reference data. This objects will not be available during
514         // offline reconstrunction. Use this function for reference data only!
515         // It calls StoreLocally function which temporarily stores the data locally; when the preprocessor
516         // finishes the data are transferred to the main storage (Grid).
517
518         return StoreLocally(fgkLocalRefStorage, path, object, metaData);
519 }
520
521 //______________________________________________________________________________________________
522 Bool_t AliShuttle::StoreLocally(const TString& localUri,
523                         const AliCDBPath& path, TObject* object, AliCDBMetaData* metaData,
524                         Int_t validityStart, Bool_t validityInfinite)
525 {
526         // Store object temporarily in local storage. Parameters are passed by Store and StoreReferenceData functions.
527         // when the preprocessor finishes the data are transferred to the main storage (Grid).
528         // The parameters are:
529         //   1) Uri of the backup storage (Local)
530         //   2) the object's path.
531         //   3) the object to be stored
532         //   4) the metaData to be associated with the object
533         //   5) the validity start run number w.r.t. the current run,
534         //      if the data is valid only for this run leave the default 0
535         //   6) specifies if the calibration data is valid for infinity (this means until updated),
536         //      typical for calibration runs, the default is kFALSE
537         //
538         // returns 0 if fail, 1 otherwise
539
540         if (fTestMode & kErrorStorage)
541         {
542                 Log(fCurrentDetector, "StoreLocally - In TESTMODE - Simulating error while storing locally");
543                 return kFALSE;
544         }
545         
546         const char* cdbType = (localUri == fgkLocalCDB) ? "CDB" : "Reference";
547
548         Int_t firstRun = GetCurrentRun() - validityStart;
549         if(firstRun < 0) {
550                 AliWarning("First valid run happens to be less than 0! Setting it to 0.");
551                 firstRun=0;
552         }
553
554         Int_t lastRun = -1;
555         if(validityInfinite) {
556                 lastRun = AliCDBRunRange::Infinity();
557         } else {
558                 lastRun = GetCurrentRun();
559         }
560
561         // Version is set to current run, it will be used later to transfer data to Grid
562         AliCDBId id(path, firstRun, lastRun, GetCurrentRun(), -1);
563
564         if(! dynamic_cast<TObjString*> (metaData->GetProperty("RunUsed(TObjString)"))){
565                 TObjString runUsed = Form("%d", GetCurrentRun());
566                 metaData->SetProperty("RunUsed(TObjString)", runUsed.Clone());
567         }
568
569         Bool_t result = kFALSE;
570
571         if (!(AliCDBManager::Instance()->GetStorage(localUri))) {
572                 Log("SHUTTLE", Form("StoreLocally - Cannot activate local %s storage", cdbType));
573         } else {
574                 result = AliCDBManager::Instance()->GetStorage(localUri)
575                                         ->Put(object, id, metaData);
576         }
577
578         if(!result) {
579
580                 Log(fCurrentDetector, Form("StoreLocally - Can't store object <%s>!", id.ToString().Data()));
581         }
582
583         return result;
584 }
585
586 //______________________________________________________________________________________________
587 Bool_t AliShuttle::StoreOCDB()
588 {
589         //
590         // Called when preprocessor ends successfully or when previous storage attempt failed (kStoreError status)
591         // Calls underlying StoreOCDB(const char*) function twice, for OCDB and Reference storage.
592         // Then calls StoreRefFilesToGrid to store reference files. 
593         //
594         
595         if (fTestMode & kErrorGrid)
596         {
597                 Log("SHUTTLE", "StoreOCDB - In TESTMODE - Simulating error while storing in the Grid");
598                 Log(fCurrentDetector, "StoreOCDB - In TESTMODE - Simulating error while storing in the Grid");
599                 return kFALSE;
600         }
601         
602         Log("SHUTTLE","StoreOCDB - Storing OCDB data ...");
603         Bool_t resultCDB = StoreOCDB(fgkMainCDB);
604
605         Log("SHUTTLE","StoreOCDB - Storing reference data ...");
606         Bool_t resultRef = StoreOCDB(fgkMainRefStorage);
607         
608         Log("SHUTTLE","StoreOCDB - Storing reference files ...");
609         Bool_t resultRefFiles = CopyFilesToGrid("reference");
610         
611         Bool_t resultMetadata = kTRUE;
612         if(fCurrentDetector == "GRP") 
613         {
614                 Log("StoreOCDB - SHUTTLE","Storing Run Metadata file ...");
615                 resultMetadata = CopyFilesToGrid("metadata");
616         }
617         
618         return resultCDB && resultRef && resultRefFiles && resultMetadata;
619 }
620
621 //______________________________________________________________________________________________
622 Bool_t AliShuttle::StoreOCDB(const TString& gridURI)
623 {
624         //
625         // Called by StoreOCDB(), performs actual storage to the main OCDB and reference storages (Grid)
626         //
627
628         TObjArray* gridIds=0;
629
630         Bool_t result = kTRUE;
631
632         const char* type = 0;
633         TString localURI;
634         if(gridURI == fgkMainCDB) {
635                 type = "OCDB";
636                 localURI = fgkLocalCDB;
637         } else if(gridURI == fgkMainRefStorage) {
638                 type = "reference";
639                 localURI = fgkLocalRefStorage;
640         } else {
641                 AliError(Form("Invalid storage URI: %s", gridURI.Data()));
642                 return kFALSE;
643         }
644
645         AliCDBManager* man = AliCDBManager::Instance();
646
647         AliCDBStorage *gridSto = man->GetStorage(gridURI);
648         if(!gridSto) {
649                 Log("SHUTTLE",
650                         Form("StoreOCDB - cannot activate main %s storage", type));
651                 return kFALSE;
652         }
653
654         gridIds = gridSto->GetQueryCDBList();
655
656         // get objects previously stored in local CDB
657         AliCDBStorage *localSto = man->GetStorage(localURI);
658         if(!localSto) {
659                 Log("SHUTTLE",
660                         Form("StoreOCDB - cannot activate local %s storage", type));
661                 return kFALSE;
662         }
663         AliCDBPath aPath(GetOfflineDetName(fCurrentDetector.Data()),"*","*");
664         // Local objects were stored with current run as Grid version!
665         TList* localEntries = localSto->GetAll(aPath.GetPath(), GetCurrentRun(), GetCurrentRun());
666         localEntries->SetOwner(1);
667
668         // loop on local stored objects
669         TIter localIter(localEntries);
670         AliCDBEntry *aLocEntry = 0;
671         while((aLocEntry = dynamic_cast<AliCDBEntry*> (localIter.Next()))){
672                 aLocEntry->SetOwner(1);
673                 AliCDBId aLocId = aLocEntry->GetId();
674                 aLocEntry->SetVersion(-1);
675                 aLocEntry->SetSubVersion(-1);
676
677                 // If local object is valid up to infinity we store it only if it is
678                 // the first unprocessed run!
679                 if (aLocId.GetLastRun() == AliCDBRunRange::Infinity() &&
680                         !fFirstUnprocessed[GetDetPos(fCurrentDetector)])
681                 {
682                         Log("SHUTTLE", Form("StoreOCDB - %s: object %s has validity infinite but "
683                                                 "there are previous unprocessed runs!",
684                                                 fCurrentDetector.Data(), aLocId.GetPath().Data()));
685                         result = kFALSE;
686                         continue;
687                 }
688
689                 // loop on Grid valid Id's
690                 Bool_t store = kTRUE;
691                 TIter gridIter(gridIds);
692                 AliCDBId* aGridId = 0;
693                 while((aGridId = dynamic_cast<AliCDBId*> (gridIter.Next()))){
694                         if(aGridId->GetPath() != aLocId.GetPath()) continue;
695                         // skip all objects valid up to infinity
696                         if(aGridId->GetLastRun() == AliCDBRunRange::Infinity()) continue;
697                         // if we get here, it means there's already some more recent object stored on Grid!
698                         store = kFALSE;
699                         break;
700                 }
701
702                 // If we get here, the file can be stored!
703                 Bool_t storeOk = gridSto->Put(aLocEntry);
704                 if(!store || storeOk){
705
706                         if (!store)
707                         {
708                                 Log(fCurrentDetector.Data(),
709                                         Form("StoreOCDB - A more recent object already exists in %s storage: <%s>",
710                                                 type, aGridId->ToString().Data()));
711                         } else {
712                                 Log("SHUTTLE",
713                                         Form("StoreOCDB - Object <%s> successfully put into %s storage",
714                                                 aLocId.ToString().Data(), type));
715                                 Log(fCurrentDetector.Data(),
716                                         Form("StoreOCDB - Object <%s> successfully put into %s storage",
717                                                 aLocId.ToString().Data(), type));
718                         }
719
720                         // removing local filename...
721                         TString filename;
722                         localSto->IdToFilename(aLocId, filename);
723                         Log("SHUTTLE", Form("StoreOCDB - Removing local file %s", filename.Data()));
724                         RemoveFile(filename.Data());
725                         continue;
726                 } else  {
727                         Log("SHUTTLE",
728                                 Form("StoreOCDB - Grid %s storage of object <%s> failed",
729                                         type, aLocId.ToString().Data()));
730                         Log(fCurrentDetector.Data(),
731                                 Form("StoreOCDB - Grid %s storage of object <%s> failed",
732                                         type, aLocId.ToString().Data()));
733                         result = kFALSE;
734                 }
735         }
736         localEntries->Clear();
737
738         return result;
739 }
740
741 //______________________________________________________________________________________________
742 Bool_t AliShuttle::CleanReferenceStorage(const char* detector)
743 {
744         // clears the directory used to store reference files of a given subdetector
745   
746         AliCDBManager* man = AliCDBManager::Instance();
747         AliCDBStorage* sto = man->GetStorage(fgkLocalRefStorage);
748         TString localBaseFolder = sto->GetBaseFolder();
749
750         TString targetDir = GetRefFilePrefix(localBaseFolder.Data(), detector);
751         
752         Log("SHUTTLE", Form("CleanReferenceStorage - Cleaning %s", targetDir.Data()));
753
754         TString begin;
755         begin.Form("%d_", GetCurrentRun());
756         
757         TSystemDirectory* baseDir = new TSystemDirectory("/", targetDir);
758         if (!baseDir)
759                 return kTRUE;
760                 
761         TList* dirList = baseDir->GetListOfFiles();
762         delete baseDir;
763         
764         if (!dirList) return kTRUE;
765                         
766         if (dirList->GetEntries() < 3) 
767         {
768                 delete dirList;
769                 return kTRUE;
770         }
771                                 
772         Int_t nDirs = 0, nDel = 0;
773         TIter dirIter(dirList);
774         TSystemFile* entry = 0;
775
776         Bool_t success = kTRUE;
777         
778         while ((entry = dynamic_cast<TSystemFile*> (dirIter.Next())))
779         {                                       
780                 if (entry->IsDirectory())
781                         continue;
782                 
783                 TString fileName(entry->GetName());
784                 if (!fileName.BeginsWith(begin))
785                         continue;
786                         
787                 nDirs++;
788                                                 
789                 // delete file
790                 Int_t result = gSystem->Unlink(fileName.Data());
791                 
792                 if (result)
793                 {
794                         Log("SHUTTLE", Form("CleanReferenceStorage - Could not delete file %s!", fileName.Data()));
795                         success = kFALSE;
796                 } else {
797                         nDel++;
798                 }
799         }
800
801         if(nDirs > 0)
802                 Log("SHUTTLE", Form("CleanReferenceStorage - %d (over %d) reference files in folder %s were deleted.", 
803                         nDel, nDirs, targetDir.Data()));
804
805                 
806         delete dirList;
807         return success;
808
809
810
811
812
813
814   Int_t result = gSystem->GetPathInfo(targetDir, 0, (Long64_t*) 0, 0, 0);
815   if (result == 0)
816   {
817     // delete directory
818     result = gSystem->Exec(Form("rm -rf %s", targetDir.Data()));
819     if (result != 0)
820     {  
821       Log("SHUTTLE", Form("CleanReferenceStorage - Could not clean directory %s", targetDir.Data()));
822       return kFALSE;
823     }
824   }
825
826   result = gSystem->mkdir(targetDir, kTRUE);
827   if (result != 0)
828   {
829     Log("SHUTTLE", Form("CleanReferenceStorage - Error creating base directory %s", targetDir.Data()));
830     return kFALSE;
831   }
832         
833   return kTRUE;
834 }
835
836 //______________________________________________________________________________________________
837 Bool_t AliShuttle::StoreReferenceFile(const char* detector, const char* localFile, const char* gridFileName)
838 {
839         //
840         // Stores reference file directly (without opening it). This function stores the file locally.
841         //
842         // The file is stored under the following location: 
843         // <base folder of local reference storage>/<DET>/<RUN#>_<gridFileName>
844         // where <gridFileName> is the second parameter given to the function
845         // 
846         
847         if (fTestMode & kErrorStorage)
848         {
849                 Log(fCurrentDetector, "StoreReferenceFile - In TESTMODE - Simulating error while storing locally");
850                 return kFALSE;
851         }
852         
853         AliCDBManager* man = AliCDBManager::Instance();
854         AliCDBStorage* sto = man->GetStorage(fgkLocalRefStorage);
855         
856         TString localBaseFolder = sto->GetBaseFolder();
857         
858         TString target = GetRefFilePrefix(localBaseFolder.Data(), detector);    
859         target.Append(Form("/%d_%s", GetCurrentRun(), gridFileName));
860         
861         return CopyFileLocally(localFile, target);
862 }
863
864 //______________________________________________________________________________________________
865 Bool_t AliShuttle::StoreRunMetadataFile(const char* localFile, const char* gridFileName)
866 {
867         //
868         // Stores Run metadata file to the Grid, in the run folder
869         //
870         // Only GRP can call this function.
871         
872         if (fTestMode & kErrorStorage)
873         {
874                 Log(fCurrentDetector, "StoreRunMetaDataFile - In TESTMODE - Simulating error while storing locally");
875                 return kFALSE;
876         }
877         
878         AliCDBManager* man = AliCDBManager::Instance();
879         AliCDBStorage* sto = man->GetStorage(fgkLocalRefStorage);
880         
881         TString localBaseFolder = sto->GetBaseFolder();
882         
883         // Build Run level folder
884         // folder = /alice/data/year/lhcPeriod/runNb/raw
885         
886                 
887         TString lhcPeriod = GetLHCPeriod();     
888         if (lhcPeriod.Length() == 0) 
889         {
890                 Log("SHUTTLE","StoreRunMetaDataFile - LHCPeriod not found in logbook!");
891                 return 0;
892         }
893         
894         // TODO partitions with one detector only write data into LHCperiod_DET
895         TString partition = GetRunParameter("detector");
896         
897         if (partition.Length() > 0 && partition != "ALICE")
898         {
899                 lhcPeriod.Append(Form("_%s", partition.Data()));
900                 Log(fCurrentDetector, Form("Run data tags merged file will be written in %s", 
901                                 lhcPeriod.Data()));
902         }
903                 
904         TString target = Form("%s/GRP/RunMetadata/alice/data/%d/%s/%09d/raw/%s", 
905                                 localBaseFolder.Data(), GetCurrentYear(), 
906                                 lhcPeriod.Data(), GetCurrentRun(), gridFileName);
907                                         
908         return CopyFileLocally(localFile, target);
909 }
910
911 //______________________________________________________________________________________________
912 Bool_t AliShuttle::CopyFileLocally(const char* localFile, const TString& target)
913 {
914         //
915         // Stores file locally. Called by StoreReferenceFile and StoreRunMetadataFile
916         // Files are temporarily stored in the local reference storage. When the preprocessor 
917         // finishes, the Shuttle calls CopyFilesToGrid to transfer the files to AliEn 
918         // (in reference or run level folders)
919         //
920         
921         TString targetDir(target(0, target.Last('/')));
922         
923         //try to open base dir folder, if it does not exist
924         void* dir = gSystem->OpenDirectory(targetDir.Data());
925         if (dir == NULL) {
926                 if (gSystem->mkdir(targetDir.Data(), kTRUE)) {
927                         Log("SHUTTLE", Form("CopyFileLocally - Can't open directory <%s>", targetDir.Data()));
928                         return kFALSE;
929                 }
930
931         } else {
932                 gSystem->FreeDirectory(dir);
933         }
934         
935         Int_t result = 0;
936         
937         result = gSystem->GetPathInfo(localFile, 0, (Long64_t*) 0, 0, 0);
938         if (result)
939         {
940                 Log("SHUTTLE", Form("CopyFileLocally - %s does not exist", localFile));
941                 return kFALSE;
942         }
943
944         result = gSystem->GetPathInfo(target, 0, (Long64_t*) 0, 0, 0);
945         if (!result)
946         {
947                 Log("SHUTTLE", Form("CopyFileLocally - target file %s already exist, removing...", target.Data()));
948                 if (gSystem->Unlink(target.Data()))
949                 {
950                         Log("SHUTTLE", Form("CopyFileLocally - Could not remove existing target file %s!", target.Data()));
951                         return kFALSE;
952                 }
953         }       
954         
955         result = gSystem->CopyFile(localFile, target);
956
957         if (result == 0)
958         {
959                 Log("SHUTTLE", Form("CopyFileLocally - File %s stored locally to %s", localFile, target.Data()));
960                 return kTRUE;
961         }
962         else
963         {
964                 Log("SHUTTLE", Form("CopyFileLocally - Could not store file %s to %s! Error code = %d", 
965                                 localFile, target.Data(), result));
966                 return kFALSE;
967         }       
968
969
970
971 }
972
973 //______________________________________________________________________________________________
974 Bool_t AliShuttle::CopyFilesToGrid(const char* type)
975 {
976         //
977         // Transfers local files to the Grid. Local files can be reference files 
978         // or run metadata file (from GRP only).
979         //
980         // According to the type (ref, metadata) the files are stored under the following location: 
981         // ref --> <base folder of reference storage>/<DET>/<RUN#>_<gridFileName>
982         // metadata --> <run data folder>/<MetadataFileName>
983         //
984                 
985         AliCDBManager* man = AliCDBManager::Instance();
986         AliCDBStorage* sto = man->GetStorage(fgkLocalRefStorage);
987         if (!sto)
988                 return kFALSE;
989         TString localBaseFolder = sto->GetBaseFolder();
990         
991         TString dir;
992         TString alienDir;
993         TString begin;
994         
995         if (strcmp(type, "reference") == 0) 
996         {
997                 dir = GetRefFilePrefix(localBaseFolder.Data(), fCurrentDetector.Data());
998                 AliCDBStorage* gridSto = man->GetStorage(fgkMainRefStorage);
999                 if (!gridSto)
1000                         return kFALSE;
1001                 TString gridBaseFolder = gridSto->GetBaseFolder();
1002                 alienDir = GetRefFilePrefix(gridBaseFolder.Data(), fCurrentDetector.Data());
1003                 begin = Form("%d_", GetCurrentRun());
1004         } 
1005         else if (strcmp(type, "metadata") == 0)
1006         {
1007                         
1008                 TString lhcPeriod = GetLHCPeriod();
1009         
1010                 if (lhcPeriod.Length() == 0) 
1011                 {
1012                         Log("SHUTTLE","CopyFilesToGrid - LHCPeriod not found in logbook!");
1013                         return 0;
1014                 }
1015                 
1016                 // TODO partitions with one detector only write data into LHCperiod_DET
1017                 TString partition = GetRunParameter("detector");
1018         
1019                 if (partition.Length() > 0 && partition != "ALICE")
1020                 {
1021                         lhcPeriod.Append(Form("_%s", partition.Data()));
1022                 }
1023                 
1024                 dir = Form("%s/GRP/RunMetadata/alice/data/%d/%s/%09d/raw", 
1025                                 localBaseFolder.Data(), GetCurrentYear(), 
1026                                 lhcPeriod.Data(), GetCurrentRun());
1027                 alienDir = dir(dir.Index("/alice/data/"), dir.Length());
1028                 
1029                 begin = "";
1030         }
1031         else 
1032         {
1033                 Log("SHUTTLE", "CopyFilesToGrid - Unexpected: type label must be reference or metadata!");
1034                 return kFALSE;
1035         }
1036                 
1037         TSystemDirectory* baseDir = new TSystemDirectory("/", dir);
1038         if (!baseDir)
1039                 return kTRUE;
1040                 
1041         TList* dirList = baseDir->GetListOfFiles();
1042         delete baseDir;
1043         
1044         if (!dirList) return kTRUE;
1045                 
1046         if (dirList->GetEntries() < 3) 
1047         {
1048                 delete dirList;
1049                 return kTRUE;
1050         }
1051                         
1052         if (!gGrid)
1053         { 
1054                 Log("SHUTTLE", "CopyFilesToGrid - Connection to Grid failed: Cannot continue!");
1055                 delete dirList;
1056                 return kFALSE;
1057         }
1058         
1059         Int_t nDirs = 0, nTransfer = 0;
1060         TIter dirIter(dirList);
1061         TSystemFile* entry = 0;
1062
1063         Bool_t success = kTRUE;
1064         Bool_t first = kTRUE;
1065         
1066         while ((entry = dynamic_cast<TSystemFile*> (dirIter.Next())))
1067         {                       
1068                 if (entry->IsDirectory())
1069                         continue;
1070                         
1071                 TString fileName(entry->GetName());
1072                 if (!fileName.BeginsWith(begin))
1073                         continue;
1074                         
1075                 nDirs++;
1076                         
1077                 if (first)
1078                 {
1079                         first = kFALSE;
1080                         // check that folder exists, otherwise create it
1081                         TGridResult* result = gGrid->Ls(alienDir.Data(), "a");
1082                         
1083                         if (!result)
1084                         {
1085                                 delete dirList;
1086                                 return kFALSE;
1087                         }
1088                         
1089                         if (!result->GetFileName(1)) // TODO: It looks like element 0 is always 0!!
1090                         {
1091                                 // TODO It does not work currently! Bug in TAliEn::Mkdir
1092                                 // TODO Manually fixed in local root v5-16-00
1093                                 if (!gGrid->Mkdir(alienDir.Data(),"-p",0))
1094                                 {
1095                                         Log("SHUTTLE", Form("CopyFilesToGrid - Cannot create directory %s",
1096                                                         alienDir.Data()));
1097                                         delete dirList;
1098                                         return kFALSE;
1099                                 } else {
1100                                         Log("SHUTTLE",Form("CopyFilesToGrid - Folder %s created", alienDir.Data()));
1101                                 }
1102                                 
1103                         } else {
1104                                         Log("SHUTTLE",Form("CopyFilesToGrid - Folder %s found", alienDir.Data()));
1105                         }
1106                 }
1107                         
1108                 TString fullLocalPath;
1109                 fullLocalPath.Form("%s/%s", dir.Data(), fileName.Data());
1110                 
1111                 TString fullGridPath;
1112                 fullGridPath.Form("alien://%s/%s", alienDir.Data(), fileName.Data());
1113
1114                 Bool_t result = TFile::Cp(fullLocalPath, fullGridPath);
1115                 
1116                 if (result)
1117                 {
1118                         Log("SHUTTLE", Form("CopyFilesToGrid - Copying local file %s to %s succeeded!", 
1119                                                 fullLocalPath.Data(), fullGridPath.Data()));
1120                         RemoveFile(fullLocalPath);
1121                         nTransfer++;
1122                 }
1123                 else
1124                 {
1125                         Log("SHUTTLE", Form("CopyFilesToGrid - Copying local file %s to %s FAILED!", 
1126                                                 fullLocalPath.Data(), fullGridPath.Data()));
1127                         success = kFALSE;
1128                 }
1129         }
1130
1131         Log("SHUTTLE", Form("CopyFilesToGrid - %d (over %d) files in folder %s copied to Grid.", 
1132                                                 nTransfer, nDirs, dir.Data()));
1133
1134                 
1135         delete dirList;
1136         return success;
1137 }
1138
1139 //______________________________________________________________________________________________
1140 const char* AliShuttle::GetRefFilePrefix(const char* base, const char* detector)
1141 {
1142         //
1143         // Get folder name of reference files 
1144         //
1145
1146         TString offDetStr(GetOfflineDetName(detector));
1147         TString dir;
1148         if (offDetStr == "ITS" || offDetStr == "MUON" || offDetStr == "PHOS")
1149         {
1150                 dir.Form("%s/%s/%s", base, offDetStr.Data(), detector);
1151         } else {
1152                 dir.Form("%s/%s", base, offDetStr.Data());
1153         }
1154         
1155         return dir.Data();
1156         
1157
1158 }
1159
1160 //______________________________________________________________________________________________
1161 void AliShuttle::CleanLocalStorage(const TString& uri)
1162 {
1163         //
1164         // Called in case the preprocessor is declared failed. Remove remaining objects from the local storages.
1165         //
1166
1167         const char* type = 0;
1168         if(uri == fgkLocalCDB) {
1169                 type = "OCDB";
1170         } else if(uri == fgkLocalRefStorage) {
1171                 type = "Reference";
1172         } else {
1173                 AliError(Form("Invalid storage URI: %s", uri.Data()));
1174                 return;
1175         }
1176
1177         AliCDBManager* man = AliCDBManager::Instance();
1178
1179         // open local storage
1180         AliCDBStorage *localSto = man->GetStorage(uri);
1181         if(!localSto) {
1182                 Log("SHUTTLE",
1183                         Form("CleanLocalStorage - cannot activate local %s storage", type));
1184                 return;
1185         }
1186
1187         TString filename(Form("%s/%s/*/Run*_v%d_s*.root",
1188                 localSto->GetBaseFolder().Data(), GetOfflineDetName(fCurrentDetector.Data()), GetCurrentRun()));
1189
1190         AliDebug(2, Form("filename = %s", filename.Data()));
1191
1192         Log("SHUTTLE", Form("Removing remaining local files for run %d and detector %s ...",
1193                 GetCurrentRun(), fCurrentDetector.Data()));
1194
1195         RemoveFile(filename.Data());
1196
1197 }
1198
1199 //______________________________________________________________________________________________
1200 void AliShuttle::RemoveFile(const char* filename)
1201 {
1202         //
1203         // removes local file
1204         //
1205
1206         TString command(Form("rm -f %s", filename));
1207
1208         Int_t result = gSystem->Exec(command.Data());
1209         if(result != 0)
1210         {
1211                 Log("SHUTTLE", Form("RemoveFile - %s: Cannot remove file %s!",
1212                         fCurrentDetector.Data(), filename));
1213         }
1214 }
1215
1216 //______________________________________________________________________________________________
1217 AliShuttleStatus* AliShuttle::ReadShuttleStatus()
1218 {
1219         //
1220         // Reads the AliShuttleStatus from the CDB
1221         //
1222
1223         if (fStatusEntry){
1224                 delete fStatusEntry;
1225                 fStatusEntry = 0;
1226         }
1227
1228         fStatusEntry = AliCDBManager::Instance()->GetStorage(GetLocalCDB())
1229                 ->Get(Form("/SHUTTLE/STATUS/%s", fCurrentDetector.Data()), GetCurrentRun());
1230
1231         if (!fStatusEntry) return 0;
1232         fStatusEntry->SetOwner(1);
1233
1234         AliShuttleStatus* status = dynamic_cast<AliShuttleStatus*> (fStatusEntry->GetObject());
1235         if (!status) {
1236                 AliError("Invalid object stored to CDB!");
1237                 return 0;
1238         }
1239
1240         return status;
1241 }
1242
1243 //______________________________________________________________________________________________
1244 Bool_t AliShuttle::WriteShuttleStatus(AliShuttleStatus* status)
1245 {
1246         //
1247         // writes the status for one subdetector
1248         //
1249
1250         if (fStatusEntry){
1251                 delete fStatusEntry;
1252                 fStatusEntry = 0;
1253         }
1254
1255         Int_t run = GetCurrentRun();
1256
1257         AliCDBId id(AliCDBPath("SHUTTLE", "STATUS", fCurrentDetector), run, run);
1258
1259         fStatusEntry = new AliCDBEntry(status, id, new AliCDBMetaData);
1260         fStatusEntry->SetOwner(1);
1261
1262         UInt_t result = AliCDBManager::Instance()->GetStorage(fgkLocalCDB)->Put(fStatusEntry);
1263
1264         if (!result) {
1265                 Log("SHUTTLE", Form("WriteShuttleStatus - Failed for %s, run %d",
1266                                                 fCurrentDetector.Data(), run));
1267                 return kFALSE;
1268         }
1269         
1270         SendMLInfo();
1271
1272         return kTRUE;
1273 }
1274
1275 //______________________________________________________________________________________________
1276 void AliShuttle::UpdateShuttleStatus(AliShuttleStatus::Status newStatus, Bool_t increaseCount)
1277 {
1278         //
1279         // changes the AliShuttleStatus for the given detector and run to the given status
1280         //
1281
1282         if (!fStatusEntry){
1283                 AliError("UNEXPECTED: fStatusEntry empty");
1284                 return;
1285         }
1286
1287         AliShuttleStatus* status = dynamic_cast<AliShuttleStatus*> (fStatusEntry->GetObject());
1288
1289         if (!status){
1290                 Log("SHUTTLE", "UpdateShuttleStatus - UNEXPECTED: status could not be read from current CDB entry");
1291                 return;
1292         }
1293
1294         TString actionStr = Form("UpdateShuttleStatus - %s: Changing state from %s to %s",
1295                                 fCurrentDetector.Data(),
1296                                 status->GetStatusName(),
1297                                 status->GetStatusName(newStatus));
1298         Log("SHUTTLE", actionStr);
1299         SetLastAction(actionStr);
1300
1301         status->SetStatus(newStatus);
1302         if (increaseCount) status->IncreaseCount();
1303
1304         AliCDBManager::Instance()->GetStorage(fgkLocalCDB)->Put(fStatusEntry);
1305
1306         SendMLInfo();
1307 }
1308
1309 //______________________________________________________________________________________________
1310 void AliShuttle::SendMLInfo()
1311 {
1312         //
1313         // sends ML information about the current status of the current detector being processed
1314         //
1315         
1316         AliShuttleStatus* status = dynamic_cast<AliShuttleStatus*> (fStatusEntry->GetObject());
1317         
1318         if (!status){
1319                 Log("SHUTTLE", "SendMLInfo - UNEXPECTED: status could not be read from current CDB entry");
1320                 return;
1321         }
1322         
1323         TMonaLisaText  mlStatus(Form("%s_status", fCurrentDetector.Data()), status->GetStatusName());
1324         TMonaLisaValue mlRetryCount(Form("%s_count", fCurrentDetector.Data()), status->GetCount());
1325
1326         TList mlList;
1327         mlList.Add(&mlStatus);
1328         mlList.Add(&mlRetryCount);
1329
1330         TString mlID;
1331         mlID.Form("%d", GetCurrentRun());
1332         fMonaLisa->SendParameters(&mlList, mlID);
1333 }
1334
1335 //______________________________________________________________________________________________
1336 Bool_t AliShuttle::ContinueProcessing()
1337 {
1338         // this function reads the AliShuttleStatus information from CDB and
1339         // checks if the processing should be continued
1340         // if yes it returns kTRUE and updates the AliShuttleStatus with nextStatus
1341
1342         if (!fConfig->HostProcessDetector(fCurrentDetector)) return kFALSE;
1343
1344         AliPreprocessor* aPreprocessor =
1345                 dynamic_cast<AliPreprocessor*> (fPreprocessorMap.GetValue(fCurrentDetector));
1346         if (!aPreprocessor)
1347         {
1348                 Log("SHUTTLE", Form("ContinueProcessing - %s: no preprocessor registered", fCurrentDetector.Data()));
1349                 return kFALSE;
1350         }
1351
1352         AliShuttleLogbookEntry::Status entryStatus =
1353                 fLogbookEntry->GetDetectorStatus(fCurrentDetector);
1354
1355         if(entryStatus != AliShuttleLogbookEntry::kUnprocessed) {
1356                 Log("SHUTTLE", Form("ContinueProcessing - %s is %s",
1357                                 fCurrentDetector.Data(),
1358                                 fLogbookEntry->GetDetectorStatusName(entryStatus)));
1359                 return kFALSE;
1360         }
1361
1362         // if we get here, according to Shuttle logbook subdetector is in UNPROCESSED state
1363
1364         // check if current run is first unprocessed run for current detector
1365         if (fConfig->StrictRunOrder(fCurrentDetector) &&
1366                 !fFirstUnprocessed[GetDetPos(fCurrentDetector)])
1367         {
1368                 if (fTestMode == kNone)
1369                 {
1370                         Log("SHUTTLE", Form("ContinueProcessing - %s requires strict run ordering"
1371                                         " but this is not the first unprocessed run!"));
1372                         return kFALSE;
1373                 }
1374                 else
1375                 {
1376                         Log("SHUTTLE", Form("ContinueProcessing - In TESTMODE - "
1377                                         "Although %s requires strict run ordering "
1378                                         "and this is not the first unprocessed run, "
1379                                         "the SHUTTLE continues"));
1380                 }
1381         }
1382
1383         AliShuttleStatus* status = ReadShuttleStatus();
1384         if (!status) {
1385                 // first time
1386                 Log("SHUTTLE", Form("ContinueProcessing - %s: Processing first time",
1387                                 fCurrentDetector.Data()));
1388                 status = new AliShuttleStatus(AliShuttleStatus::kStarted);
1389                 return WriteShuttleStatus(status);
1390         }
1391
1392         // The following two cases shouldn't happen if Shuttle Logbook was correctly updated.
1393         // If it happens it may mean Logbook updating failed... let's do it now!
1394         if (status->GetStatus() == AliShuttleStatus::kDone ||
1395             status->GetStatus() == AliShuttleStatus::kFailed){
1396                 Log("SHUTTLE", Form("ContinueProcessing - %s is already %s. Updating Shuttle Logbook",
1397                                         fCurrentDetector.Data(),
1398                                         status->GetStatusName(status->GetStatus())));
1399                 UpdateShuttleLogbook(fCurrentDetector.Data(),
1400                                         status->GetStatusName(status->GetStatus()));
1401                 return kFALSE;
1402         }
1403
1404         if (status->GetStatus() == AliShuttleStatus::kStoreStarted || status->GetStatus() == AliShuttleStatus::kStoreError) {
1405                 Log("SHUTTLE",
1406                         Form("ContinueProcessing - %s: Grid storage of one or more "
1407                                 "objects failed. Trying again now",
1408                                 fCurrentDetector.Data()));
1409                 UpdateShuttleStatus(AliShuttleStatus::kStoreStarted);
1410                 if (StoreOCDB()){
1411                         Log("SHUTTLE", Form("ContinueProcessing - %s: all objects "
1412                                 "successfully stored into main storage",
1413                                 fCurrentDetector.Data()));
1414                         UpdateShuttleStatus(AliShuttleStatus::kDone);
1415                         UpdateShuttleLogbook(fCurrentDetector, "DONE");
1416                 } else {
1417                         Log("SHUTTLE",
1418                                 Form("ContinueProcessing - %s: Grid storage failed again",
1419                                         fCurrentDetector.Data()));
1420                         UpdateShuttleStatus(AliShuttleStatus::kStoreError);
1421                 }
1422                 return kFALSE;
1423         }
1424
1425         // if we get here, there is a restart
1426         Bool_t cont = kFALSE;
1427
1428         // abort conditions
1429         if (status->GetCount() >= fConfig->GetMaxRetries()) {
1430                 Log("SHUTTLE", Form("ContinueProcessing - %s failed %d times in status %s - "
1431                                 "Updating Shuttle Logbook", fCurrentDetector.Data(),
1432                                 status->GetCount(), status->GetStatusName()));
1433                 UpdateShuttleLogbook(fCurrentDetector.Data(), "FAILED");
1434                 UpdateShuttleStatus(AliShuttleStatus::kFailed);
1435
1436                 // there may still be objects in local OCDB and reference storage
1437                 // and FXS databases may be not updated: do it now!
1438                 
1439                 // TODO Currently disabled, we want to keep files in case of failure!
1440                 // CleanLocalStorage(fgkLocalCDB);
1441                 // CleanLocalStorage(fgkLocalRefStorage);
1442                 // UpdateTableFailCase();
1443                 
1444                 // Send mail to detector expert!
1445                 Log("SHUTTLE", Form("ContinueProcessing - Sending mail to %s expert...", 
1446                                         fCurrentDetector.Data()));
1447                 if (!SendMail())
1448                         Log("SHUTTLE", Form("ContinueProcessing - Could not send mail to %s expert",
1449                                         fCurrentDetector.Data()));
1450
1451         } else {
1452                 Log("SHUTTLE", Form("ContinueProcessing - %s: restarting. "
1453                                 "Aborted before with %s. Retry number %d.", fCurrentDetector.Data(),
1454                                 status->GetStatusName(), status->GetCount()));
1455                 Bool_t increaseCount = kTRUE;
1456                 if (status->GetStatus() == AliShuttleStatus::kDCSError || 
1457                         status->GetStatus() == AliShuttleStatus::kDCSStarted)
1458                                 increaseCount = kFALSE;
1459                                 
1460                 UpdateShuttleStatus(AliShuttleStatus::kStarted, increaseCount);
1461                 cont = kTRUE;
1462         }
1463
1464         return cont;
1465 }
1466
1467 //______________________________________________________________________________________________
1468 Bool_t AliShuttle::Process(AliShuttleLogbookEntry* entry)
1469 {
1470         //
1471         // Makes data retrieval for all detectors in the configuration.
1472         // entry: Shuttle logbook entry, contains run paramenters and status of detectors
1473         // (Unprocessed, Inactive, Failed or Done).
1474         // Returns kFALSE in case of error occured and kTRUE otherwise
1475         //
1476
1477         if (!entry) return kFALSE;
1478
1479         fLogbookEntry = entry;
1480
1481         Log("SHUTTLE", Form("\t\t\t^*^*^*^*^*^*^*^*^*^*^*^* run %d: START ^*^*^*^*^*^*^*^*^*^*^*^*",
1482                                         GetCurrentRun()));
1483
1484         // Send the information to ML
1485         TMonaLisaText  mlStatus("SHUTTLE_status", "Processing");
1486         TMonaLisaText  mlRunType("SHUTTLE_runtype", Form("%s (%s)", entry->GetRunType(), entry->GetRunParameter("log")));
1487
1488         TList mlList;
1489         mlList.Add(&mlStatus);
1490         mlList.Add(&mlRunType);
1491
1492         TString mlID;
1493         mlID.Form("%d", GetCurrentRun());
1494         fMonaLisa->SendParameters(&mlList, mlID);
1495
1496         if (fLogbookEntry->IsDone())
1497         {
1498                 Log("SHUTTLE","Process - Shuttle is already DONE. Updating logbook");
1499                 UpdateShuttleLogbook("shuttle_done");
1500                 fLogbookEntry = 0;
1501                 return kTRUE;
1502         }
1503
1504         // read test mode if flag is set
1505         if (fReadTestMode)
1506         {
1507                 fTestMode = kNone;
1508                 TString logEntry(entry->GetRunParameter("log"));
1509                 //printf("log entry = %s\n", logEntry.Data());
1510                 TString searchStr("Testmode: ");
1511                 Int_t pos = logEntry.Index(searchStr.Data());
1512                 //printf("%d\n", pos);
1513                 if (pos >= 0)
1514                 {
1515                         TSubString subStr = logEntry(pos + searchStr.Length(), logEntry.Length());
1516                         //printf("%s\n", subStr.String().Data());
1517                         TString newStr(subStr.Data());
1518                         TObjArray* token = newStr.Tokenize(' ');
1519                         if (token)
1520                         {
1521                                 //token->Print();
1522                                 TObjString* tmpStr = dynamic_cast<TObjString*> (token->First());
1523                                 if (tmpStr)
1524                                 {
1525                                         Int_t testMode = tmpStr->String().Atoi();
1526                                         if (testMode > 0)
1527                                         {
1528                                                 Log("SHUTTLE", Form("Process - Enabling test mode %d", testMode));
1529                                                 SetTestMode((TestMode) testMode);
1530                                         }
1531                                 }
1532                                 delete token;          
1533                         }
1534                 }
1535         }
1536                 
1537         fLogbookEntry->Print("all");
1538
1539         // Initialization
1540         Bool_t hasError = kFALSE;
1541
1542         // Set the CDB and Reference folders according to the year and LHC period
1543         TString lhcPeriod(GetLHCPeriod());
1544         if (lhcPeriod.Length() == 0) 
1545         {
1546                 Log("SHUTTLE","Process - LHCPeriod not found in logbook!");
1547                 return 0; 
1548         }       
1549         
1550         if (fgkMainCDB.Length() == 0)
1551                 fgkMainCDB = Form("alien://folder=/alice/data/%d/%s/OCDB?user=alidaq?cacheFold=/tmp/OCDBCache", 
1552                                         GetCurrentYear(), lhcPeriod.Data());
1553         
1554         if (fgkMainRefStorage.Length() == 0)
1555                 fgkMainRefStorage = Form("alien://folder=/alice/data/%d/%s/Reference?user=alidaq?cacheFold=/tmp/OCDBCache", 
1556                                         GetCurrentYear(), lhcPeriod.Data());
1557         
1558         // Loop on detectors in the configuration
1559         TIter iter(fConfig->GetDetectors());
1560         TObjString* aDetector = 0;
1561
1562         Bool_t first = kTRUE;
1563
1564         while ((aDetector = (TObjString*) iter.Next()))
1565         {
1566                 fCurrentDetector = aDetector->String();
1567
1568                 if (ContinueProcessing() == kFALSE) continue;
1569                 
1570                 if (first)
1571                 {
1572                   // only read QueryCDB when needed and only once
1573                   AliCDBStorage *mainCDBSto = AliCDBManager::Instance()->GetStorage(fgkMainCDB);
1574                   if(mainCDBSto) mainCDBSto->QueryCDB(GetCurrentRun());
1575                   AliCDBStorage *mainRefSto = AliCDBManager::Instance()->GetStorage(fgkMainRefStorage);
1576                   if(mainRefSto) mainRefSto->QueryCDB(GetCurrentRun());
1577                   first = kFALSE;
1578                 }
1579
1580                 Log("SHUTTLE", Form("\t\t\t****** run %d - %s: START  ******",
1581                                                 GetCurrentRun(), aDetector->GetName()));
1582
1583                 for(Int_t iSys=0;iSys<3;iSys++) fFXSCalled[iSys]=kFALSE;
1584
1585                 Log(fCurrentDetector.Data(), "Process - Starting processing");
1586
1587                 Int_t pid = fork();
1588
1589                 if (pid < 0)
1590                 {
1591                         Log("SHUTTLE", "Process - ERROR: Forking failed");
1592                 }
1593                 else if (pid > 0)
1594                 {
1595                         // parent
1596                         Log("SHUTTLE", Form("Process - In parent process of %d - %s: Starting monitoring",
1597                                                         GetCurrentRun(), aDetector->GetName()));
1598
1599                         Long_t begin = time(0);
1600
1601                         int status; // to be used with waitpid, on purpose an int (not Int_t)!
1602                         while (waitpid(pid, &status, WNOHANG) == 0)
1603                         {
1604                                 Long_t expiredTime = time(0) - begin;
1605
1606                                 if (expiredTime > fConfig->GetPPTimeOut())
1607                                 {
1608                                         TString tmp;
1609                                         tmp.Form("Process - Process of %s time out. "
1610                                                         "Run time: %d seconds. Killing...",
1611                                                         fCurrentDetector.Data(), expiredTime);
1612                                         Log("SHUTTLE", tmp);
1613                                         Log(fCurrentDetector, tmp);
1614
1615                                         kill(pid, 9);
1616
1617                                         UpdateShuttleStatus(AliShuttleStatus::kPPTimeOut);
1618                                         hasError = kTRUE;
1619
1620                                         gSystem->Sleep(1000);
1621                                 }
1622                                 else
1623                                 {
1624                                         gSystem->Sleep(1000);
1625                                         
1626                                         TString checkStr;
1627                                         checkStr.Form("ps -o vsize --pid %d | tail -n 1", pid);
1628                                         FILE* pipe = gSystem->OpenPipe(checkStr, "r");
1629                                         if (!pipe)
1630                                         {
1631                                                 Log("SHUTTLE", Form("Process - Error: "
1632                                                         "Could not open pipe to %s", checkStr.Data()));
1633                                                 continue;
1634                                         }
1635                                                 
1636                                         char buffer[100];
1637                                         if (!fgets(buffer, 100, pipe))
1638                                         {
1639                                                 Log("SHUTTLE", "Process - Error: ps did not return anything");
1640                                                 gSystem->ClosePipe(pipe);
1641                                                 continue;
1642                                         }
1643                                         gSystem->ClosePipe(pipe);
1644                                         
1645                                         //Log("SHUTTLE", Form("ps returned %s", buffer));
1646                                         
1647                                         Int_t mem = 0;
1648                                         if ((sscanf(buffer, "%d\n", &mem) != 1) || !mem)
1649                                         {
1650                                                 Log("SHUTTLE", "Process - Error: Could not parse output of ps");
1651                                                 continue;
1652                                         }
1653                                         
1654                                         if (expiredTime % 60 == 0)
1655                                         {
1656                                                 Log("SHUTTLE", Form("Process - %s: Checking process. "
1657                                                         "Run time: %d seconds - Memory consumption: %d KB",
1658                                                         fCurrentDetector.Data(), expiredTime, mem));
1659                                                 SendAlive();
1660                                         }
1661                                         
1662                                         if (mem > fConfig->GetPPMaxMem())
1663                                         {
1664                                                 TString tmp;
1665                                                 tmp.Form("Process - Process exceeds maximum allowed memory "
1666                                                         "(%d KB > %d KB). Killing...",
1667                                                         mem, fConfig->GetPPMaxMem());
1668                                                 Log("SHUTTLE", tmp);
1669                                                 Log(fCurrentDetector, tmp);
1670         
1671                                                 kill(pid, 9);
1672         
1673                                                 UpdateShuttleStatus(AliShuttleStatus::kPPOutOfMemory);
1674                                                 hasError = kTRUE;
1675         
1676                                                 gSystem->Sleep(1000);
1677                                         }
1678                                 }
1679                         }
1680
1681                         Log("SHUTTLE", Form("Process - In parent process of %d - %s: Client has terminated.",
1682                                                                 GetCurrentRun(), aDetector->GetName()));
1683
1684                         if (WIFEXITED(status))
1685                         {
1686                                 Int_t returnCode = WEXITSTATUS(status);
1687
1688                                 Log("SHUTTLE", Form("Process - %s: the return code is %d", fCurrentDetector.Data(),
1689                                                                                 returnCode));
1690
1691                                 if (returnCode == 0) hasError = kTRUE;
1692                         }
1693                 }
1694                 else if (pid == 0)
1695                 {
1696                         // client
1697                         Log("SHUTTLE", Form("Process - In client process of %d - %s", GetCurrentRun(),
1698                                 aDetector->GetName()));
1699
1700                         Log("SHUTTLE", Form("Process - Redirecting output to %s log",fCurrentDetector.Data()));
1701
1702                         if ((freopen(GetLogFileName(fCurrentDetector), "a", stdout)) == 0)
1703                         {
1704                                 Log("SHUTTLE", "Process - Could not freopen stdout");
1705                         }
1706                         else
1707                         {
1708                                 fOutputRedirected = kTRUE;
1709                                 if ((dup2(fileno(stdout), fileno(stderr))) < 0)
1710                                         Log("SHUTTLE", "Process - Could not redirect stderr");
1711                                 
1712                         }
1713                         
1714                         TString wd = gSystem->WorkingDirectory();
1715                         TString tmpDir = Form("%s/%s_%d_process", GetShuttleTempDir(), 
1716                                 fCurrentDetector.Data(), GetCurrentRun());
1717                         
1718                         Int_t result = gSystem->GetPathInfo(tmpDir.Data(), 0, (Long64_t*) 0, 0, 0);
1719                         if (!result) // temp dir already exists!
1720                         {
1721                                 Log(fCurrentDetector.Data(), 
1722                                         Form("Process - %s dir already exists! Removing...", tmpDir.Data()));
1723                                 gSystem->Exec(Form("rm -rf %s",tmpDir.Data()));         
1724                         } 
1725                         
1726                         if (gSystem->mkdir(tmpDir.Data(), 1))
1727                         {
1728                                 Log(fCurrentDetector.Data(), "Process - could not make temp directory!!");
1729                                 gSystem->Exit(1);
1730                         }
1731                         
1732                         if (!gSystem->ChangeDirectory(tmpDir.Data())) 
1733                         {
1734                                 Log(fCurrentDetector.Data(), "Process - could not change directory!!");
1735                                 gSystem->Exit(1);                       
1736                         }
1737                         
1738                         Bool_t success = ProcessCurrentDetector();
1739                         
1740                         gSystem->ChangeDirectory(wd.Data());
1741                                                 
1742                         if (success) // Preprocessor finished successfully!
1743                         { 
1744                                 // remove temporary folder
1745                                 // temporary commented (JF)
1746                                 //gSystem->Exec(Form("rm -rf %s",tmpDir.Data()));
1747                                 
1748                                 // Update time_processed field in FXS DB
1749                                 if (UpdateTable() == kFALSE)
1750                                         Log("SHUTTLE", Form("Process - %s: Could not update FXS databases!", 
1751                                                         fCurrentDetector.Data()));
1752
1753                                 // Transfer the data from local storage to main storage (Grid)
1754                                 UpdateShuttleStatus(AliShuttleStatus::kStoreStarted);
1755                                 if (StoreOCDB() == kFALSE)
1756                                 {
1757                                         Log("SHUTTLE", 
1758                                                 Form("\t\t\t****** run %d - %s: STORAGE ERROR ******",
1759                                                         GetCurrentRun(), aDetector->GetName()));
1760                                         UpdateShuttleStatus(AliShuttleStatus::kStoreError);
1761                                         success = kFALSE;
1762                                 } else {
1763                                         Log("SHUTTLE", 
1764                                                 Form("\t\t\t****** run %d - %s: DONE ******",
1765                                                         GetCurrentRun(), aDetector->GetName()));
1766                                         UpdateShuttleStatus(AliShuttleStatus::kDone);
1767                                         UpdateShuttleLogbook(fCurrentDetector, "DONE");
1768                                 }
1769                         } else 
1770                         {
1771                                 Log("SHUTTLE", 
1772                                         Form("\t\t\t****** run %d - %s: PP ERROR ******",
1773                                                 GetCurrentRun(), aDetector->GetName()));
1774                         }
1775
1776                         for (UInt_t iSys=0; iSys<3; iSys++)
1777                         {
1778                                 if (fFXSCalled[iSys]) fFXSlist[iSys].Clear();
1779                         }
1780
1781                         Log("SHUTTLE", Form("Process - Client process of %d - %s is exiting now with %d.",
1782                                                         GetCurrentRun(), aDetector->GetName(), success));
1783
1784                         // the client exits here
1785                         gSystem->Exit(success);
1786
1787                         AliError("We should never get here!!!");
1788                 }
1789         }
1790
1791         Log("SHUTTLE", Form("\t\t\t^*^*^*^*^*^*^*^*^*^*^*^* run %d: FINISH ^*^*^*^*^*^*^*^*^*^*^*^*",
1792                                                         GetCurrentRun()));
1793
1794         //check if shuttle is done for this run, if so update logbook
1795         TObjArray checkEntryArray;
1796         checkEntryArray.SetOwner(1);
1797         TString whereClause = Form("where run=%d", GetCurrentRun());
1798         if (!QueryShuttleLogbook(whereClause.Data(), checkEntryArray) || 
1799                         checkEntryArray.GetEntries() == 0) {
1800                 Log("SHUTTLE", Form("Process - Warning: Cannot check status of run %d on Shuttle logbook!",
1801                                                 GetCurrentRun()));
1802                 return hasError == kFALSE;
1803         }
1804
1805         AliShuttleLogbookEntry* checkEntry = dynamic_cast<AliShuttleLogbookEntry*>
1806                                                 (checkEntryArray.At(0));
1807
1808         if (checkEntry)
1809         {
1810                 if (checkEntry->IsDone())
1811                 {
1812                         Log("SHUTTLE","Process - Shuttle is DONE. Updating logbook");
1813                         UpdateShuttleLogbook("shuttle_done");
1814                 }
1815                 else
1816                 {
1817                         for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
1818                         {
1819                                 if (checkEntry->GetDetectorStatus(iDet) == AliShuttleLogbookEntry::kUnprocessed)
1820                                 {
1821                                         AliDebug(2, Form("Run %d: setting %s as \"not first time unprocessed\"",
1822                                                         checkEntry->GetRun(), GetDetName(iDet)));
1823                                         fFirstUnprocessed[iDet] = kFALSE;
1824                                 }
1825                         }
1826                 }
1827         }
1828
1829         fLogbookEntry = 0;
1830
1831         return hasError == kFALSE;
1832 }
1833
1834 //______________________________________________________________________________________________
1835 Bool_t AliShuttle::ProcessCurrentDetector()
1836 {
1837         //
1838         // Makes data retrieval just for a specific detector (fCurrentDetector).
1839         // Threre should be a configuration for this detector.
1840
1841         Log("SHUTTLE", Form("ProcessCurrentDetector - Retrieving values for %s, run %d", 
1842                                                 fCurrentDetector.Data(), GetCurrentRun()));
1843
1844         TString wd = gSystem->WorkingDirectory();
1845         
1846         if (!CleanReferenceStorage(fCurrentDetector.Data()))
1847                 return kFALSE;
1848         
1849         gSystem->ChangeDirectory(wd.Data());
1850         
1851         TMap* dcsMap = new TMap();
1852
1853         // call preprocessor
1854         AliPreprocessor* aPreprocessor =
1855                 dynamic_cast<AliPreprocessor*> (fPreprocessorMap.GetValue(fCurrentDetector));
1856
1857         aPreprocessor->Initialize(GetCurrentRun(), GetCurrentStartTime(), GetCurrentEndTime());
1858
1859         Bool_t processDCS = aPreprocessor->ProcessDCS();
1860
1861         if (!processDCS)
1862         {
1863                 Log(fCurrentDetector, "ProcessCurrentDetector -"
1864                         " The preprocessor requested to skip the retrieval of DCS values");
1865         }
1866         else if (fTestMode & kSkipDCS)
1867         {
1868                 Log(fCurrentDetector, "ProcessCurrentDetector - In TESTMODE: Skipping DCS processing");
1869         } 
1870         else if (fTestMode & kErrorDCS)
1871         {
1872                 Log(fCurrentDetector, "ProcessCurrentDetector - In TESTMODE: Simulating DCS error");
1873                 UpdateShuttleStatus(AliShuttleStatus::kDCSStarted);
1874                 UpdateShuttleStatus(AliShuttleStatus::kDCSError);
1875                 delete dcsMap;
1876                 return kFALSE;
1877         } else {
1878
1879                 UpdateShuttleStatus(AliShuttleStatus::kDCSStarted);
1880
1881                 // Query DCS archive
1882                 Int_t nServers = fConfig->GetNServers(fCurrentDetector);
1883                 
1884                 for (int iServ=0; iServ<nServers; iServ++)
1885                 {
1886                 
1887                         TString host(fConfig->GetDCSHost(fCurrentDetector, iServ));
1888                         Int_t port = fConfig->GetDCSPort(fCurrentDetector, iServ);
1889                         Int_t multiSplit = fConfig->GetMultiSplit(fCurrentDetector, iServ);
1890
1891                         Log(fCurrentDetector, Form("ProcessCurrentDetector -"
1892                                         " Querying DCS Amanda server %s:%d (%d of %d)", 
1893                                         host.Data(), port, iServ+1, nServers));
1894                         
1895                         TMap* aliasMap = 0;
1896                         TMap* dpMap = 0;
1897         
1898                         if (fConfig->GetDCSAliases(fCurrentDetector, iServ)->GetEntries() > 0)
1899                         {
1900                                 aliasMap = GetValueSet(host, port, 
1901                                                 fConfig->GetDCSAliases(fCurrentDetector, iServ), 
1902                                                 kAlias, multiSplit);
1903                                 if (!aliasMap)
1904                                 {
1905                                         Log(fCurrentDetector, 
1906                                                 Form("ProcessCurrentDetector -"
1907                                                         " Error retrieving DCS aliases from server %s."
1908                                                         " Sending mail to DCS experts!", host.Data()));
1909                                         UpdateShuttleStatus(AliShuttleStatus::kDCSError);
1910                                         
1911                                         //if (!SendMailToDCS())
1912                                         //      Log("SHUTTLE", Form("ProcessCurrentDetector - Could not send mail to DCS experts!"));
1913
1914                                         delete dcsMap;
1915                                         return kFALSE;
1916                                 }
1917                         }
1918                         
1919                         if (fConfig->GetDCSDataPoints(fCurrentDetector, iServ)->GetEntries() > 0)
1920                         {
1921                                 dpMap = GetValueSet(host, port, 
1922                                                 fConfig->GetDCSDataPoints(fCurrentDetector, iServ), 
1923                                                 kDP, multiSplit);
1924                                 if (!dpMap)
1925                                 {
1926                                         Log(fCurrentDetector, 
1927                                                 Form("ProcessCurrentDetector -"
1928                                                         " Error retrieving DCS data points from server %s."
1929                                                         " Sending mail to DCS experts!", host.Data()));
1930                                         UpdateShuttleStatus(AliShuttleStatus::kDCSError);
1931                                         
1932                                         //if (!SendMailToDCS())
1933                                         //      Log("SHUTTLE", Form("ProcessCurrentDetector - Could not send mail to DCS experts!"));
1934                                         
1935                                         if (aliasMap) delete aliasMap;
1936                                         delete dcsMap;
1937                                         return kFALSE;
1938                                 }                               
1939                         }
1940                         
1941                         // merge aliasMap and dpMap into dcsMap
1942                         if(aliasMap) {
1943                                 TIter iter(aliasMap);
1944                                 TObjString* key = 0;
1945                                 while ((key = (TObjString*) iter.Next()))
1946                                         dcsMap->Add(key, aliasMap->GetValue(key->String()));
1947                                 
1948                                 aliasMap->SetOwner(kFALSE);
1949                                 delete aliasMap;
1950                         }       
1951                         
1952                         if(dpMap) {
1953                                 TIter iter(dpMap);
1954                                 TObjString* key = 0;
1955                                 while ((key = (TObjString*) iter.Next()))
1956                                         dcsMap->Add(key, dpMap->GetValue(key->String()));
1957                                 
1958                                 dpMap->SetOwner(kFALSE);
1959                                 delete dpMap;
1960                         }
1961                 }
1962         }
1963         
1964         // save map into file, to help debugging in case of preprocessor error
1965         TFile* f = TFile::Open("DCSMap.root","recreate");
1966         f->cd();
1967         dcsMap->Write("DCSMap", TObject::kSingleKey);
1968         f->Close();
1969         delete f;
1970         
1971         // DCS Archive DB processing successful. Call Preprocessor!
1972         UpdateShuttleStatus(AliShuttleStatus::kPPStarted);
1973
1974         UInt_t returnValue = aPreprocessor->Process(dcsMap);
1975
1976         if (returnValue > 0) // Preprocessor error!
1977         {
1978                 Log(fCurrentDetector, Form("ProcessCurrentDetector - "
1979                                 "Preprocessor failed. Process returned %d.", returnValue));
1980                 UpdateShuttleStatus(AliShuttleStatus::kPPError);
1981                 dcsMap->DeleteAll();
1982                 delete dcsMap;
1983                 return kFALSE;
1984         }
1985         
1986         // preprocessor ok!
1987         UpdateShuttleStatus(AliShuttleStatus::kPPDone);
1988         Log(fCurrentDetector, Form("ProcessCurrentDetector - %s preprocessor returned success",
1989                                 fCurrentDetector.Data()));
1990
1991         dcsMap->DeleteAll();
1992         delete dcsMap;
1993
1994         return kTRUE;
1995 }
1996
1997 //______________________________________________________________________________________________
1998 void AliShuttle::CountOpenRuns()
1999 {
2000         // Query DAQ's Shuttle logbook and sends the number of open runs to ML
2001         
2002         // check connection, in case connect
2003         if (!Connect(3)) 
2004                 return;
2005
2006         TString sqlQuery;
2007         sqlQuery = Form("select count(*) from %s where shuttle_done=0", fConfig->GetShuttlelbTable());
2008         
2009         TSQLResult* aResult = fServer[3]->Query(sqlQuery);
2010         if (!aResult) {
2011                 AliError(Form("Can't execute query <%s>!", sqlQuery.Data()));
2012                 return;
2013         }
2014
2015         AliDebug(2,Form("Query = %s", sqlQuery.Data()));
2016         
2017         if (aResult->GetRowCount() == 0) {
2018                 AliError(Form("No result for query %s received", sqlQuery.Data()));
2019                 return;
2020         }
2021
2022         if (aResult->GetFieldCount() != 1) {
2023                 AliError(Form("Invalid field count for query %s received", sqlQuery.Data()));
2024                 return;
2025         }
2026
2027         TSQLRow* aRow = aResult->Next();
2028         if (!aRow) {
2029                 AliError(Form("Could not receive result of query %s", sqlQuery.Data()));
2030                 return;
2031         }
2032         
2033         TString result(aRow->GetField(0), aRow->GetFieldLength(0));
2034         Int_t count = result.Atoi();
2035         
2036         Log("SHUTTLE", Form("%d unprocessed runs", count));
2037         
2038         delete aRow;
2039         delete aResult;
2040
2041         TMonaLisaValue mlStatus("SHUTTLE_openruns", count);
2042
2043         TList mlList;
2044         mlList.Add(&mlStatus);
2045
2046         fMonaLisa->SendParameters(&mlList, "__PROCESSINGINFO__");
2047 }
2048
2049 //______________________________________________________________________________________________
2050 Bool_t AliShuttle::QueryShuttleLogbook(const char* whereClause,
2051                 TObjArray& entries)
2052 {
2053         // Query DAQ's Shuttle logbook and fills detector status object.
2054         // Call QueryRunParameters to query DAQ logbook for run parameters.
2055         //
2056
2057         entries.SetOwner(1);
2058
2059         // check connection, in case connect
2060         if (!Connect(3)) return kFALSE;
2061
2062         TString sqlQuery;
2063         sqlQuery = Form("select * from %s %s order by run", fConfig->GetShuttlelbTable(), whereClause);
2064
2065         TSQLResult* aResult = fServer[3]->Query(sqlQuery);
2066         if (!aResult) {
2067                 AliError(Form("Can't execute query <%s>!", sqlQuery.Data()));
2068                 return kFALSE;
2069         }
2070
2071         AliDebug(2,Form("Query = %s", sqlQuery.Data()));
2072
2073         if(aResult->GetRowCount() == 0) {
2074                 Log("SHUTTLE", "No entries in Shuttle Logbook match request");
2075                 delete aResult;
2076                 return kTRUE;
2077         }
2078
2079         // TODO Check field count!
2080         const UInt_t nCols = 23;
2081         if (aResult->GetFieldCount() != (Int_t) nCols) {
2082                 Log("SHUTTLE", "Invalid SQL result field number!");
2083                 delete aResult;
2084                 return kFALSE;
2085         }
2086
2087         TSQLRow* aRow;
2088         while ((aRow = aResult->Next())) {
2089                 TString runString(aRow->GetField(0), aRow->GetFieldLength(0));
2090                 Int_t run = runString.Atoi();
2091
2092                 AliShuttleLogbookEntry *entry = QueryRunParameters(run);
2093                 if (!entry)
2094                         continue;
2095
2096                 // loop on detectors
2097                 for(UInt_t ii = 0; ii < nCols; ii++)
2098                         entry->SetDetectorStatus(aResult->GetFieldName(ii), aRow->GetField(ii));
2099
2100                 entries.AddLast(entry);
2101                 delete aRow;
2102         }
2103
2104         delete aResult;
2105         return kTRUE;
2106 }
2107
2108 //______________________________________________________________________________________________
2109 AliShuttleLogbookEntry* AliShuttle::QueryRunParameters(Int_t run)
2110 {
2111         //
2112         // Retrieve run parameters written in the DAQ logbook and sets them into AliShuttleLogbookEntry object
2113         //
2114
2115         // check connection, in case connect
2116         if (!Connect(3))
2117                 return 0;
2118
2119         TString sqlQuery;
2120         sqlQuery.Form("select * from %s where run=%d", fConfig->GetDAQlbTable(), run);
2121
2122         TSQLResult* aResult = fServer[3]->Query(sqlQuery);
2123         if (!aResult) {
2124                 Log("SHUTTLE", Form("Can't execute query <%s>!", sqlQuery.Data()));
2125                 return 0;
2126         }
2127
2128         if (aResult->GetRowCount() == 0) {
2129                 Log("SHUTTLE", Form("QueryRunParameters - No entry in DAQ Logbook for run %d. Skipping", run));
2130                 delete aResult;
2131                 return 0;
2132         }
2133
2134         if (aResult->GetRowCount() > 1) {
2135                 Log("SHUTTLE", Form("QueryRunParameters - UNEXPECTED: "
2136                                 "more than one entry in DAQ Logbook for run %d!", run));
2137                 delete aResult;
2138                 return 0;
2139         }
2140
2141         TSQLRow* aRow = aResult->Next();
2142         if (!aRow)
2143         {
2144                 Log("SHUTTLE", Form("QueryRunParameters - Could not retrieve row for run %d. Skipping", run));
2145                 delete aResult;
2146                 return 0;
2147         }
2148
2149         AliShuttleLogbookEntry* entry = new AliShuttleLogbookEntry(run);
2150
2151         for (Int_t ii = 0; ii < aResult->GetFieldCount(); ii++)
2152                 entry->SetRunParameter(aResult->GetFieldName(ii), aRow->GetField(ii));
2153
2154         UInt_t startTime = entry->GetStartTime();
2155         UInt_t endTime = entry->GetEndTime();
2156
2157 //      if (!startTime || !endTime || startTime > endTime) 
2158 //      {
2159 //              Log("SHUTTLE",
2160 //                      Form("QueryRunParameters - Invalid parameters for Run %d: startTime = %d, endTime = %d. Skipping!",
2161 //                              run, startTime, endTime));              
2162 //              
2163 //              Log("SHUTTLE", Form("Marking SHUTTLE done for run %d", run));
2164 //              fLogbookEntry = entry;  
2165 //              if (!UpdateShuttleLogbook("shuttle_done"))
2166 //              {
2167 //                      AliError(Form("Could not update logbook for run %d !", run));
2168 //              }
2169 //              fLogbookEntry = 0;
2170 //                              
2171 //              delete entry;
2172 //              delete aRow;
2173 //              delete aResult;
2174 //              return 0;
2175 //      }
2176
2177         if (!startTime) 
2178         {
2179                 Log("SHUTTLE",
2180                         Form("QueryRunParameters - Invalid parameters for Run %d: " 
2181                                 "startTime = %d, endTime = %d. Skipping!",
2182                                         run, startTime, endTime));              
2183                 
2184                 Log("SHUTTLE", Form("Marking SHUTTLE done for run %d", run));
2185                 fLogbookEntry = entry;  
2186                 if (!UpdateShuttleLogbook("shuttle_ignored"))
2187                 {
2188                         AliError(Form("Could not update logbook for run %d !", run));
2189                 }
2190                 fLogbookEntry = 0;
2191                                 
2192                 delete entry;
2193                 delete aRow;
2194                 delete aResult;
2195                 return 0;
2196         }
2197         
2198         if (startTime && !endTime) 
2199         {
2200                 // TODO Here we don't mark SHUTTLE done, because this may mean 
2201                 //the run is still ongoing!!            
2202                 Log("SHUTTLE",
2203                         Form("QueryRunParameters - Invalid parameters for Run %d: "
2204                              "startTime = %d, endTime = %d. Skipping (Shuttle won't be marked as DONE)!",
2205                                         run, startTime, endTime));              
2206                 
2207                 //Log("SHUTTLE", Form("Marking SHUTTLE done for run %d", run));
2208                 //fLogbookEntry = entry;        
2209                 //if (!UpdateShuttleLogbook("shuttle_done"))
2210                 //{
2211                 //      AliError(Form("Could not update logbook for run %d !", run));
2212                 //}
2213                 //fLogbookEntry = 0;
2214                                 
2215                 delete entry;
2216                 delete aRow;
2217                 delete aResult;
2218                 return 0;
2219         }
2220                         
2221         if (startTime && endTime && (startTime > endTime)) 
2222         {
2223                 Log("SHUTTLE",
2224                         Form("QueryRunParameters - Invalid parameters for Run %d: "
2225                                 "startTime = %d, endTime = %d. Skipping!",
2226                                         run, startTime, endTime));              
2227                 
2228                 Log("SHUTTLE", Form("Marking SHUTTLE done for run %d", run));
2229                 fLogbookEntry = entry;  
2230                 if (!UpdateShuttleLogbook("shuttle_ignored"))
2231                 {
2232                         AliError(Form("Could not update logbook for run %d !", run));
2233                 }
2234                 fLogbookEntry = 0;
2235                                 
2236                 delete entry;
2237                 delete aRow;
2238                 delete aResult;
2239                 return 0;
2240         }
2241                         
2242         TString totEventsStr = entry->GetRunParameter("totalEvents");  
2243         Int_t totEvents = totEventsStr.Atoi();
2244         if (totEvents < 1) 
2245         {
2246                 Log("SHUTTLE",
2247                         Form("QueryRunParameters - Run %d has 0 events - Skipping!", run));             
2248                 
2249                 Log("SHUTTLE", Form("Marking SHUTTLE done for run %d", run));           
2250                 fLogbookEntry = entry;  
2251                 if (!UpdateShuttleLogbook("shuttle_ignored"))
2252                 {
2253                         AliError(Form("Could not update logbook for run %d !", run));
2254                 }
2255                 fLogbookEntry = 0;
2256                                 
2257                 delete entry;
2258                 delete aRow;
2259                 delete aResult;
2260                 return 0;
2261         }
2262
2263         delete aRow;
2264         delete aResult;
2265
2266         return entry;
2267 }
2268
2269 //______________________________________________________________________________________________
2270 TMap* AliShuttle::GetValueSet(const char* host, Int_t port, const TSeqCollection* entries,
2271                               DCSType type, Int_t multiSplit)
2272 {
2273         // Retrieve all "entry" data points from the DCS server
2274         // host, port: TSocket connection parameters
2275         // entries: list of name of the alias or data point
2276         // type: kAlias or kDP
2277         // returns TMap of values, 0 when failure
2278         
2279         AliDCSClient client(host, port, fTimeout, fRetries, multiSplit);
2280
2281         TMap* result = 0;
2282         if (type == kAlias)
2283         {
2284                 result = client.GetAliasValues(entries, GetCurrentStartTime(), 
2285                         GetCurrentEndTime());
2286         } 
2287         else if (type == kDP)
2288         {
2289                 result = client.GetDPValues(entries, GetCurrentStartTime(), 
2290                         GetCurrentEndTime());
2291         }
2292
2293         if (result == 0)
2294         {
2295                 Log(fCurrentDetector.Data(), Form("GetValueSet - Can't get entries! Reason: %s",
2296                         client.GetErrorString(client.GetResultErrorCode())));
2297                 if (client.GetResultErrorCode() == AliDCSClient::fgkServerError)        
2298                         Log(fCurrentDetector.Data(), Form("GetValueSet - Server error code: %s",
2299                                 client.GetServerError().Data()));
2300
2301                 return 0;
2302         }
2303                 
2304         return result;
2305 }
2306
2307 //______________________________________________________________________________________________
2308 const char* AliShuttle::GetFile(Int_t system, const char* detector,
2309                 const char* id, const char* source)
2310 {
2311         // Get calibration file from file exchange servers
2312         // First queris the FXS database for the file name, using the run, detector, id and source info
2313         // then calls RetrieveFile(filename) for actual copy to local disk
2314         // run: current run being processed (given by Logbook entry fLogbookEntry)
2315         // detector: the Preprocessor name
2316         // id: provided as a parameter by the Preprocessor
2317         // source: provided by the Preprocessor through GetFileSources function
2318
2319         // check if test mode should simulate a FXS error
2320         if (fTestMode & kErrorFXSFiles)
2321         {
2322                 Log(detector, Form("GetFile - In TESTMODE - Simulating error while connecting to %s FXS", GetSystemName(system)));
2323                 return 0;
2324         }
2325         
2326         // check connection, in case connect
2327         if (!Connect(system))
2328         {
2329                 Log(detector, Form("GetFile - Couldn't connect to %s FXS database", GetSystemName(system)));
2330                 return 0;
2331         }
2332
2333         // Query preparation
2334         TString sourceName(source);
2335         Int_t nFields = 3;
2336         TString sqlQueryStart = Form("select filePath,size,fileChecksum from %s where",
2337                                                                 fConfig->GetFXSdbTable(system));
2338         TString whereClause = Form("run=%d and detector=\"%s\" and fileId=\"%s\"",
2339                                                                 GetCurrentRun(), detector, id);
2340
2341         if (system == kDAQ)
2342         {
2343                 whereClause += Form(" and DAQsource=\"%s\"", source);
2344         }
2345         else if (system == kDCS)
2346         {
2347                 sourceName="none";
2348         }
2349         else if (system == kHLT)
2350         {
2351                 whereClause += Form(" and DDLnumbers=\"%s\"", source);
2352                 nFields = 3;
2353         }
2354
2355         TString sqlQuery = Form("%s %s", sqlQueryStart.Data(), whereClause.Data());
2356
2357         AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
2358
2359         // Query execution
2360         TSQLResult* aResult = 0;
2361         aResult = dynamic_cast<TSQLResult*> (fServer[system]->Query(sqlQuery));
2362         if (!aResult) {
2363                 Log(detector, Form("GetFileName - Can't execute SQL query to %s database for: id = %s, source = %s",
2364                                 GetSystemName(system), id, sourceName.Data()));
2365                 return 0;
2366         }
2367
2368         if(aResult->GetRowCount() == 0)
2369         {
2370                 Log(detector,
2371                         Form("GetFileName - No entry in %s FXS db for: id = %s, source = %s",
2372                                 GetSystemName(system), id, sourceName.Data()));
2373                 delete aResult;
2374                 return 0;
2375         }
2376
2377         if (aResult->GetRowCount() > 1) {
2378                 Log(detector,
2379                         Form("GetFileName - More than one entry in %s FXS db for: id = %s, source = %s",
2380                                 GetSystemName(system), id, sourceName.Data()));
2381                 delete aResult;
2382                 return 0;
2383         }
2384
2385         if (aResult->GetFieldCount() != nFields) {
2386                 Log(detector,
2387                         Form("GetFileName - Wrong field count in %s FXS db for: id = %s, source = %s",
2388                                 GetSystemName(system), id, sourceName.Data()));
2389                 delete aResult;
2390                 return 0;
2391         }
2392
2393         TSQLRow* aRow = dynamic_cast<TSQLRow*> (aResult->Next());
2394
2395         if (!aRow){
2396                 Log(detector, Form("GetFileName - Empty set result in %s FXS db from query: id = %s, source = %s",
2397                                 GetSystemName(system), id, sourceName.Data()));
2398                 delete aResult;
2399                 return 0;
2400         }
2401
2402         TString filePath(aRow->GetField(0), aRow->GetFieldLength(0));
2403         TString fileSize(aRow->GetField(1), aRow->GetFieldLength(1));
2404         TString fileChecksum(aRow->GetField(2), aRow->GetFieldLength(2));
2405
2406         delete aResult;
2407         delete aRow;
2408
2409         AliDebug(2, Form("filePath = %s; size = %s, fileChecksum = %s",
2410                                 filePath.Data(), fileSize.Data(), fileChecksum.Data()));
2411
2412         // retrieved file is renamed to make it unique
2413         TString localFileName = Form("%s/%s_%d_process/%s_%s_%d_%s_%s.shuttle",
2414                                         GetShuttleTempDir(), detector, GetCurrentRun(),
2415                                         GetSystemName(system), detector, GetCurrentRun(), 
2416                                         id, sourceName.Data());
2417
2418
2419         // file retrieval from FXS
2420         UInt_t nRetries = 0;
2421         UInt_t maxRetries = 3;
2422         Bool_t result = kFALSE;
2423
2424         // copy!! if successful TSystem::Exec returns 0
2425         while(nRetries++ < maxRetries) {
2426                 AliDebug(2, Form("Trying to copy file. Retry # %d", nRetries));
2427                 result = RetrieveFile(system, filePath.Data(), localFileName.Data());
2428                 if(!result)
2429                 {
2430                         Log(detector, Form("GetFileName - Copy of file %s from %s FXS failed",
2431                                         filePath.Data(), GetSystemName(system)));
2432                         continue;
2433                 } 
2434
2435                 if (fileChecksum.Length()>0)
2436                 {
2437                         // compare md5sum of local file with the one stored in the FXS DB
2438                         Int_t md5Comp = gSystem->Exec(Form("md5sum %s |grep %s 2>&1 > /dev/null",
2439                                                 localFileName.Data(), fileChecksum.Data()));
2440
2441                         if (md5Comp != 0)
2442                         {
2443                                 Log(detector, Form("GetFileName - md5sum of file %s does not match with local copy!",
2444                                                         filePath.Data()));
2445                                 result = kFALSE;
2446                                 continue;
2447                         }
2448                 } else {
2449                         Log(fCurrentDetector, Form("GetFile - md5sum of file %s not set in %s database, skipping comparison",
2450                                                         filePath.Data(), GetSystemName(system)));
2451                 }
2452                 if (result) break;
2453         }
2454
2455         if(!result) return 0;
2456
2457         fFXSCalled[system]=kTRUE;
2458         TObjString *fileParams = new TObjString(Form("%s#!?!#%s", id, sourceName.Data()));
2459         fFXSlist[system].Add(fileParams);
2460
2461         static TString staticLocalFileName;
2462         staticLocalFileName.Form("%s", localFileName.Data());
2463         
2464         Log(fCurrentDetector, Form("GetFile - Retrieved file with id %s and "
2465                         "source %s from %s to %s", id, source, 
2466                         GetSystemName(system), localFileName.Data()));
2467                         
2468         return staticLocalFileName.Data();
2469 }
2470
2471 //______________________________________________________________________________________________
2472 Bool_t AliShuttle::RetrieveFile(UInt_t system, const char* fxsFileName, const char* localFileName)
2473 {
2474         //
2475         // Copies file from FXS to local Shuttle machine
2476         //
2477
2478         // check temp directory: trying to cd to temp; if it does not exist, create it
2479         AliDebug(2, Form("Copy file %s from %s FXS into %s",
2480                         GetSystemName(system), fxsFileName, localFileName));
2481                         
2482         TString tmpDir(localFileName);
2483         
2484         tmpDir = tmpDir(0,tmpDir.Last('/'));
2485
2486         Int_t noDir = gSystem->GetPathInfo(tmpDir.Data(), 0, (Long64_t*) 0, 0, 0);
2487         if (noDir) // temp dir does not exists!
2488         {
2489                 if (gSystem->mkdir(tmpDir.Data(), 1))
2490                 {
2491                         Log(fCurrentDetector.Data(), "RetrieveFile - could not make temp directory!!");
2492                         return kFALSE;
2493                 }
2494         }
2495
2496         TString baseFXSFolder;
2497         if (system == kDAQ)
2498         {
2499                 baseFXSFolder = "FES/";
2500         }
2501         else if (system == kDCS)
2502         {
2503                 baseFXSFolder = "";
2504         }
2505         else if (system == kHLT)
2506         {
2507                 baseFXSFolder = "/opt/FXS/";
2508         }
2509
2510
2511         TString command = Form("scp -oPort=%d -2 %s@%s:%s%s %s",
2512                 fConfig->GetFXSPort(system),
2513                 fConfig->GetFXSUser(system),
2514                 fConfig->GetFXSHost(system),
2515                 baseFXSFolder.Data(),
2516                 fxsFileName,
2517                 localFileName);
2518
2519         AliDebug(2, Form("%s",command.Data()));
2520
2521         Bool_t result = (gSystem->Exec(command.Data()) == 0);
2522
2523         return result;
2524 }
2525
2526 //______________________________________________________________________________________________
2527 TList* AliShuttle::GetFileSources(Int_t system, const char* detector, const char* id)
2528 {
2529         //
2530         // Get sources producing the condition file Id from file exchange servers
2531         // if id is NULL all sources are returned (distinct)
2532         //
2533
2534         if (id)
2535         {
2536                 Log(detector, Form("GetFileSources - Querying %s FXS for files with id %s produced by %s", GetSystemName(system), id, detector));
2537         } else {
2538                 Log(detector, Form("GetFileSources - Querying %s FXS for files produced by %s", GetSystemName(system), detector));
2539         }
2540         
2541         // check if test mode should simulate a FXS error
2542         if (fTestMode & kErrorFXSSources)
2543         {
2544                 Log(detector, Form("GetFileSources - In TESTMODE - Simulating error while connecting to %s FXS", GetSystemName(system)));
2545                 return 0;
2546         }
2547
2548         if (system == kDCS)
2549         {
2550                 Log(detector, "GetFileSources - WARNING: DCS system has only one source of data!");
2551                 TList *list = new TList();
2552                 list->SetOwner(1);
2553                 list->Add(new TObjString(" "));
2554                 return list;
2555         }
2556
2557         // check connection, in case connect
2558         if (!Connect(system))
2559         {
2560                 Log(detector, Form("GetFileSources - Couldn't connect to %s FXS database", GetSystemName(system)));
2561                 return NULL;
2562         }
2563
2564         TString sourceName = 0;
2565         if (system == kDAQ)
2566         {
2567                 sourceName = "DAQsource";
2568         } else if (system == kHLT)
2569         {
2570                 sourceName = "DDLnumbers";
2571         }
2572
2573         TString sqlQueryStart = Form("select distinct %s from %s where", sourceName.Data(), fConfig->GetFXSdbTable(system));
2574         TString whereClause = Form("run=%d and detector=\"%s\"",
2575                                 GetCurrentRun(), detector);
2576         if (id)
2577                 whereClause += Form(" and fileId=\"%s\"", id);
2578         TString sqlQuery = Form("%s %s", sqlQueryStart.Data(), whereClause.Data());
2579
2580         AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
2581
2582         // Query execution
2583         TSQLResult* aResult;
2584         aResult = fServer[system]->Query(sqlQuery);
2585         if (!aResult) {
2586                 Log(detector, Form("GetFileSources - Can't execute SQL query to %s database for id: %s",
2587                                 GetSystemName(system), id));
2588                 return 0;
2589         }
2590
2591         TList *list = new TList();
2592         list->SetOwner(1);
2593         
2594         if (aResult->GetRowCount() == 0)
2595         {
2596                 Log(detector,
2597                         Form("GetFileSources - No entry in %s FXS table for id: %s", GetSystemName(system), id));
2598                 delete aResult;
2599                 return list;
2600         }
2601
2602         Log(detector, Form("GetFileSources - Found %d sources", aResult->GetRowCount()));
2603
2604         TSQLRow* aRow;
2605         while ((aRow = aResult->Next()))
2606         {
2607
2608                 TString source(aRow->GetField(0), aRow->GetFieldLength(0));
2609                 AliDebug(2, Form("%s = %s", sourceName.Data(), source.Data()));
2610                 list->Add(new TObjString(source));
2611                 delete aRow;
2612         }
2613
2614         delete aResult;
2615
2616         return list;
2617 }
2618
2619 //______________________________________________________________________________________________
2620 TList* AliShuttle::GetFileIDs(Int_t system, const char* detector, const char* source)
2621 {
2622         //
2623         // Get all ids of condition files produced by a given source from file exchange servers
2624         //
2625         
2626         Log(detector, Form("GetFileIDs - Retrieving ids with source %s with %s", source, GetSystemName(system)));
2627
2628         // check if test mode should simulate a FXS error
2629         if (fTestMode & kErrorFXSSources)
2630         {
2631                 Log(detector, Form("GetFileIDs - In TESTMODE - Simulating error while connecting to %s FXS", GetSystemName(system)));
2632                 return 0;
2633         }
2634
2635         // check connection, in case connect
2636         if (!Connect(system))
2637         {
2638                 Log(detector, Form("GetFileIDs - Couldn't connect to %s FXS database", GetSystemName(system)));
2639                 return NULL;
2640         }
2641
2642         TString sourceName = 0;
2643         if (system == kDAQ)
2644         {
2645                 sourceName = "DAQsource";
2646         } else if (system == kHLT)
2647         {
2648                 sourceName = "DDLnumbers";
2649         }
2650
2651         TString sqlQueryStart = Form("select fileId from %s where", fConfig->GetFXSdbTable(system));
2652         TString whereClause = Form("run=%d and detector=\"%s\"",
2653                                 GetCurrentRun(), detector);
2654         if (sourceName.Length() > 0 && source)
2655                 whereClause += Form(" and %s=\"%s\"", sourceName.Data(), source);
2656         TString sqlQuery = Form("%s %s", sqlQueryStart.Data(), whereClause.Data());
2657
2658         AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
2659
2660         // Query execution
2661         TSQLResult* aResult;
2662         aResult = fServer[system]->Query(sqlQuery);
2663         if (!aResult) {
2664                 Log(detector, Form("GetFileIDs - Can't execute SQL query to %s database for source: %s",
2665                                 GetSystemName(system), source));
2666                 return 0;
2667         }
2668
2669         TList *list = new TList();
2670         list->SetOwner(1);
2671         
2672         if (aResult->GetRowCount() == 0)
2673         {
2674                 Log(detector,
2675                         Form("GetFileIDs - No entry in %s FXS table for source: %s", GetSystemName(system), source));
2676                 delete aResult;
2677                 return list;
2678         }
2679
2680         Log(detector, Form("GetFileIDs - Found %d ids", aResult->GetRowCount()));
2681
2682         TSQLRow* aRow;
2683
2684         while ((aRow = aResult->Next()))
2685         {
2686
2687                 TString id(aRow->GetField(0), aRow->GetFieldLength(0));
2688                 AliDebug(2, Form("fileId = %s", id.Data()));
2689                 list->Add(new TObjString(id));
2690                 delete aRow;
2691         }
2692
2693         delete aResult;
2694
2695         return list;
2696 }
2697
2698 //______________________________________________________________________________________________
2699 Bool_t AliShuttle::Connect(Int_t system)
2700 {
2701         // Connect to MySQL Server of the system's FXS MySQL databases
2702         // DAQ Logbook, Shuttle Logbook and DAQ FXS db are on the same host
2703         //
2704
2705         // check connection: if already connected return
2706         if(fServer[system] && fServer[system]->IsConnected()) return kTRUE;
2707
2708         TString dbHost, dbUser, dbPass, dbName;
2709
2710         if (system < 3) // FXS db servers
2711         {
2712                 dbHost = Form("mysql://%s:%d", fConfig->GetFXSdbHost(system), fConfig->GetFXSdbPort(system));
2713                 dbUser = fConfig->GetFXSdbUser(system);
2714                 dbPass = fConfig->GetFXSdbPass(system);
2715                 dbName =   fConfig->GetFXSdbName(system);
2716         } else { // Run & Shuttle logbook servers
2717         // TODO Will the Shuttle logbook server be the same as the Run logbook server ???
2718                 dbHost = Form("mysql://%s:%d", fConfig->GetDAQlbHost(), fConfig->GetDAQlbPort());
2719                 dbUser = fConfig->GetDAQlbUser();
2720                 dbPass = fConfig->GetDAQlbPass();
2721                 dbName =   fConfig->GetDAQlbDB();
2722         }
2723
2724         fServer[system] = TSQLServer::Connect(dbHost.Data(), dbUser.Data(), dbPass.Data());
2725         if (!fServer[system] || !fServer[system]->IsConnected()) {
2726                 if(system < 3)
2727                 {
2728                 AliError(Form("Can't establish connection to FXS database for %s",
2729                                         AliShuttleInterface::GetSystemName(system)));
2730                 } else {
2731                 AliError("Can't establish connection to Run logbook.");
2732                 }
2733                 if(fServer[system]) delete fServer[system];
2734                 return kFALSE;
2735         }
2736
2737         // Get tables
2738         TSQLResult* aResult=0;
2739         switch(system){
2740                 case kDAQ:
2741                         aResult = fServer[kDAQ]->GetTables(dbName.Data());
2742                         break;
2743                 case kDCS:
2744                         aResult = fServer[kDCS]->GetTables(dbName.Data());
2745                         break;
2746                 case kHLT:
2747                         aResult = fServer[kHLT]->GetTables(dbName.Data());
2748                         break;
2749                 default:
2750                         aResult = fServer[3]->GetTables(dbName.Data());
2751                         break;
2752         }
2753
2754         delete aResult;
2755         return kTRUE;
2756 }
2757
2758 //______________________________________________________________________________________________
2759 Bool_t AliShuttle::UpdateTable()
2760 {
2761         //
2762         // Update FXS table filling time_processed field in all rows corresponding to current run and detector
2763         //
2764
2765         Bool_t result = kTRUE;
2766
2767         for (UInt_t system=0; system<3; system++)
2768         {
2769                 if(!fFXSCalled[system]) continue;
2770
2771                 // check connection, in case connect
2772                 if (!Connect(system))
2773                 {
2774                         Log(fCurrentDetector, Form("UpdateTable - Couldn't connect to %s FXS database", GetSystemName(system)));
2775                         result = kFALSE;
2776                         continue;
2777                 }
2778
2779                 TTimeStamp now; // now
2780
2781                 // Loop on FXS list entries
2782                 TIter iter(&fFXSlist[system]);
2783                 TObjString *aFXSentry=0;
2784                 while ((aFXSentry = dynamic_cast<TObjString*> (iter.Next())))
2785                 {
2786                         TString aFXSentrystr = aFXSentry->String();
2787                         TObjArray *aFXSarray = aFXSentrystr.Tokenize("#!?!#");
2788                         if (!aFXSarray || aFXSarray->GetEntries() != 2 )
2789                         {
2790                                 Log(fCurrentDetector, Form("UpdateTable - error updating %s FXS entry. Check string: <%s>",
2791                                         GetSystemName(system), aFXSentrystr.Data()));
2792                                 if(aFXSarray) delete aFXSarray;
2793                                 result = kFALSE;
2794                                 continue;
2795                         }
2796                         const char* fileId = ((TObjString*) aFXSarray->At(0))->GetName();
2797                         const char* source = ((TObjString*) aFXSarray->At(1))->GetName();
2798
2799                         TString whereClause;
2800                         if (system == kDAQ)
2801                         {
2802                                 whereClause = Form("where run=%d and detector=\"%s\" and fileId=\"%s\" and DAQsource=\"%s\";",
2803                                                         GetCurrentRun(), fCurrentDetector.Data(), fileId, source);
2804                         }
2805                         else if (system == kDCS)
2806                         {
2807                                 whereClause = Form("where run=%d and detector=\"%s\" and fileId=\"%s\";",
2808                                                         GetCurrentRun(), fCurrentDetector.Data(), fileId);
2809                         }
2810                         else if (system == kHLT)
2811                         {
2812                                 whereClause = Form("where run=%d and detector=\"%s\" and fileId=\"%s\" and DDLnumbers=\"%s\";",
2813                                                         GetCurrentRun(), fCurrentDetector.Data(), fileId, source);
2814                         }
2815
2816                         delete aFXSarray;
2817
2818                         TString sqlQuery = Form("update %s set time_processed=%d %s", fConfig->GetFXSdbTable(system),
2819                                                                 now.GetSec(), whereClause.Data());
2820
2821                         AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
2822
2823                         // Query execution
2824                         TSQLResult* aResult;
2825                         aResult = dynamic_cast<TSQLResult*> (fServer[system]->Query(sqlQuery));
2826                         if (!aResult)
2827                         {
2828                                 Log(fCurrentDetector, Form("UpdateTable - %s db: can't execute SQL query <%s>",
2829                                                                 GetSystemName(system), sqlQuery.Data()));
2830                                 result = kFALSE;
2831                                 continue;
2832                         }
2833                         delete aResult;
2834                 }
2835         }
2836
2837         return result;
2838 }
2839
2840 //______________________________________________________________________________________________
2841 Bool_t AliShuttle::UpdateTableFailCase()
2842 {
2843         // Update FXS table filling time_processed field in all rows corresponding to current run and detector
2844         // this is called in case the preprocessor is declared failed for the current run, because
2845         // the fields are updated only in case of success
2846
2847         Bool_t result = kTRUE;
2848
2849         for (UInt_t system=0; system<3; system++)
2850         {
2851                 // check connection, in case connect
2852                 if (!Connect(system))
2853                 {
2854                         Log(fCurrentDetector, Form("UpdateTableFailCase - Couldn't connect to %s FXS database",
2855                                                         GetSystemName(system)));
2856                         result = kFALSE;
2857                         continue;
2858                 }
2859
2860                 TTimeStamp now; // now
2861
2862                 // Loop on FXS list entries
2863
2864                 TString whereClause = Form("where run=%d and detector=\"%s\";",
2865                                                 GetCurrentRun(), fCurrentDetector.Data());
2866
2867
2868                 TString sqlQuery = Form("update %s set time_processed=%d %s", fConfig->GetFXSdbTable(system),
2869                                                         now.GetSec(), whereClause.Data());
2870
2871                 AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
2872
2873                 // Query execution
2874                 TSQLResult* aResult;
2875                 aResult = dynamic_cast<TSQLResult*> (fServer[system]->Query(sqlQuery));
2876                 if (!aResult)
2877                 {
2878                         Log(fCurrentDetector, Form("UpdateTableFailCase - %s db: can't execute SQL query <%s>",
2879                                                         GetSystemName(system), sqlQuery.Data()));
2880                         result = kFALSE;
2881                         continue;
2882                 }
2883                 delete aResult;
2884         }
2885
2886         return result;
2887 }
2888
2889 //______________________________________________________________________________________________
2890 Bool_t AliShuttle::UpdateShuttleLogbook(const char* detector, const char* status)
2891 {
2892         //
2893         // Update Shuttle logbook filling detector or shuttle_done column
2894         // ex. of usage: UpdateShuttleLogbook("PHOS", "DONE") or UpdateShuttleLogbook("shuttle_done")
2895         //
2896
2897         // check connection, in case connect
2898         if(!Connect(3)){
2899                 Log("SHUTTLE", "UpdateShuttleLogbook - Couldn't connect to DAQ Logbook.");
2900                 return kFALSE;
2901         }
2902
2903         TString detName(detector);
2904         TString setClause;
2905         if (detName == "shuttle_done" || detName == "shuttle_ignored")
2906         {
2907                 setClause = "set shuttle_done=1";
2908
2909                 if (detName == "shuttle_done")
2910                 {
2911                         // Send the information to ML
2912                         TMonaLisaText  mlStatus("SHUTTLE_status", "Done");
2913
2914                         TList mlList;
2915                         mlList.Add(&mlStatus);
2916                 
2917                         TString mlID;
2918                         mlID.Form("%d", GetCurrentRun());
2919                         fMonaLisa->SendParameters(&mlList, mlID);
2920                 }
2921         } else {
2922                 TString statusStr(status);
2923                 if(statusStr.Contains("done", TString::kIgnoreCase) ||
2924                    statusStr.Contains("failed", TString::kIgnoreCase)){
2925                         setClause = Form("set %s=\"%s\"", detector, status);
2926                 } else {
2927                         Log("SHUTTLE",
2928                                 Form("UpdateShuttleLogbook - Invalid status <%s> for detector %s",
2929                                         status, detector));
2930                         return kFALSE;
2931                 }
2932         }
2933
2934         TString whereClause = Form("where run=%d", GetCurrentRun());
2935
2936         TString sqlQuery = Form("update %s %s %s",
2937                                         fConfig->GetShuttlelbTable(), setClause.Data(), whereClause.Data());
2938
2939         AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
2940
2941         // Query execution
2942         TSQLResult* aResult;
2943         aResult = dynamic_cast<TSQLResult*> (fServer[3]->Query(sqlQuery));
2944         if (!aResult) {
2945                 Log("SHUTTLE", Form("UpdateShuttleLogbook - Can't execute query <%s>", sqlQuery.Data()));
2946                 return kFALSE;
2947         }
2948         delete aResult;
2949
2950         return kTRUE;
2951 }
2952
2953 //______________________________________________________________________________________________
2954 Int_t AliShuttle::GetCurrentRun() const
2955 {
2956         //
2957         // Get current run from logbook entry
2958         //
2959
2960         return fLogbookEntry ? fLogbookEntry->GetRun() : -1;
2961 }
2962
2963 //______________________________________________________________________________________________
2964 UInt_t AliShuttle::GetCurrentStartTime() const
2965 {
2966         //
2967         // get current start time
2968         //
2969
2970         return fLogbookEntry ? fLogbookEntry->GetStartTime() : 0;
2971 }
2972
2973 //______________________________________________________________________________________________
2974 UInt_t AliShuttle::GetCurrentEndTime() const
2975 {
2976         //
2977         // get current end time from logbook entry
2978         //
2979
2980         return fLogbookEntry ? fLogbookEntry->GetEndTime() : 0;
2981 }
2982
2983 //______________________________________________________________________________________________
2984 UInt_t AliShuttle::GetCurrentYear() const
2985 {
2986         //
2987         // Get current year from logbook entry
2988         //
2989
2990         if (!fLogbookEntry) return 0;
2991         
2992         TTimeStamp startTime(GetCurrentStartTime());
2993         TString year =  Form("%d",startTime.GetDate());
2994         year = year(0,4);
2995         
2996         return year.Atoi();
2997 }
2998
2999 //______________________________________________________________________________________________
3000 const char* AliShuttle::GetLHCPeriod() const
3001 {
3002         //
3003         // Get current LHC period from logbook entry
3004         //
3005
3006         if (!fLogbookEntry) return 0;
3007                 
3008         return fLogbookEntry->GetRunParameter("LHCperiod");
3009 }
3010
3011 //______________________________________________________________________________________________
3012 void AliShuttle::Log(const char* detector, const char* message)
3013 {
3014         //
3015         // Fill log string with a message
3016         //
3017
3018         TString logRunDir = GetShuttleLogDir();
3019         if (GetCurrentRun() >=0)
3020                 logRunDir += Form("/%d", GetCurrentRun());
3021         
3022         void* dir = gSystem->OpenDirectory(logRunDir.Data());
3023         if (dir == NULL) {
3024                 if (gSystem->mkdir(logRunDir.Data(), kTRUE)) {
3025                         AliError(Form("Can't open directory <%s>", GetShuttleLogDir()));
3026                         return;
3027                 }
3028
3029         } else {
3030                 gSystem->FreeDirectory(dir);
3031         }
3032
3033         TString toLog = Form("%s (%d): %s - ", TTimeStamp(time(0)).AsString("s"), getpid(), detector);
3034         if (GetCurrentRun() >= 0) 
3035                 toLog += Form("run %d - ", GetCurrentRun());
3036         toLog += Form("%s", message);
3037
3038         AliInfo(toLog.Data());
3039         
3040         // if we redirect the log output already to the file, leave here
3041         if (fOutputRedirected && strcmp(detector, "SHUTTLE") != 0)
3042                 return;
3043
3044         TString fileName = GetLogFileName(detector);
3045         
3046         gSystem->ExpandPathName(fileName);
3047
3048         ofstream logFile;
3049         logFile.open(fileName, ofstream::out | ofstream::app);
3050
3051         if (!logFile.is_open()) {
3052                 AliError(Form("Could not open file %s", fileName.Data()));
3053                 return;
3054         }
3055
3056         logFile << toLog.Data() << "\n";
3057
3058         logFile.close();
3059 }
3060
3061 //______________________________________________________________________________________________
3062 TString AliShuttle::GetLogFileName(const char* detector) const
3063 {
3064         // 
3065         // returns the name of the log file for a given sub detector
3066         //
3067         
3068         TString fileName;
3069         
3070         if (GetCurrentRun() >= 0) 
3071         {
3072                 fileName.Form("%s/%d/%s_%d.log", GetShuttleLogDir(), GetCurrentRun(), 
3073                         detector, GetCurrentRun());
3074         } else {
3075                 fileName.Form("%s/%s.log", GetShuttleLogDir(), detector);
3076         }
3077
3078         return fileName;
3079 }
3080
3081 //______________________________________________________________________________________________
3082 void AliShuttle::SendAlive()
3083 {
3084         // sends alive message to ML
3085         
3086         TMonaLisaText mlStatus("SHUTTLE_status", "Alive");
3087
3088         TList mlList;
3089         mlList.Add(&mlStatus);
3090
3091         fMonaLisa->SendParameters(&mlList, "__PROCESSINGINFO__");
3092 }
3093
3094 //______________________________________________________________________________________________
3095 Bool_t AliShuttle::Collect(Int_t run)
3096 {
3097         //
3098         // Collects conditions data for all UNPROCESSED run written to DAQ LogBook in case of run = -1 (default)
3099         // If a dedicated run is given this run is processed
3100         //
3101         // In operational mode, this is the Shuttle function triggered by the EOR signal.
3102         //
3103
3104         if (run == -1)
3105                 Log("SHUTTLE","Collect - Shuttle called. Collecting conditions data for unprocessed runs");
3106         else
3107                 Log("SHUTTLE", Form("Collect - Shuttle called. Collecting conditions data for run %d", run));
3108
3109         SetLastAction("Starting");
3110
3111         // create ML instance
3112         if (!fMonaLisa)
3113                 fMonaLisa = new TMonaLisaWriter(fConfig->GetMonitorHost(), fConfig->GetMonitorTable());
3114                 
3115         SendAlive();
3116         CountOpenRuns();
3117
3118         TString whereClause("where shuttle_done=0");
3119         if (run != -1)
3120                 whereClause += Form(" and run=%d", run);
3121
3122         TObjArray shuttleLogbookEntries;
3123         if (!QueryShuttleLogbook(whereClause, shuttleLogbookEntries))
3124         {
3125                 Log("SHUTTLE", "Collect - Can't retrieve entries from Shuttle logbook");
3126                 return kFALSE;
3127         }
3128
3129         if (shuttleLogbookEntries.GetEntries() == 0)
3130         {
3131                 if (run == -1)
3132                         Log("SHUTTLE","Collect - Found no UNPROCESSED runs in Shuttle logbook");
3133                 else
3134                         Log("SHUTTLE", Form("Collect - Run %d is already DONE "
3135                                                 "or it does not exist in Shuttle logbook", run));
3136                 return kTRUE;
3137         }
3138
3139         for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
3140                 fFirstUnprocessed[iDet] = kTRUE;
3141
3142         if (run != -1)
3143         {
3144                 // query Shuttle logbook for earlier runs, check if some detectors are unprocessed,
3145                 // flag them into fFirstUnprocessed array
3146                 TString whereClause(Form("where shuttle_done=0 and run < %d", run));
3147                 TObjArray tmpLogbookEntries;
3148                 if (!QueryShuttleLogbook(whereClause, tmpLogbookEntries))
3149                 {
3150                         Log("SHUTTLE", "Collect - Can't retrieve entries from Shuttle logbook");
3151                         return kFALSE;
3152                 }
3153
3154                 TIter iter(&tmpLogbookEntries);
3155                 AliShuttleLogbookEntry* anEntry = 0;
3156                 while ((anEntry = dynamic_cast<AliShuttleLogbookEntry*> (iter.Next())))
3157                 {
3158                         for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
3159                         {
3160                                 if (anEntry->GetDetectorStatus(iDet) == AliShuttleLogbookEntry::kUnprocessed)
3161                                 {
3162                                         AliDebug(2, Form("Run %d: setting %s as \"not first time unprocessed\"",
3163                                                         anEntry->GetRun(), GetDetName(iDet)));
3164                                         fFirstUnprocessed[iDet] = kFALSE;
3165                                 }
3166                         }
3167
3168                 }
3169
3170         }
3171
3172         if (!RetrieveConditionsData(shuttleLogbookEntries))
3173         {
3174                 Log("SHUTTLE", "Collect - Process of at least one run failed");
3175                 return kFALSE;
3176         }
3177
3178         Log("SHUTTLE", "Collect - Requested run(s) successfully processed");
3179         return kTRUE;
3180 }
3181
3182 //______________________________________________________________________________________________
3183 Bool_t AliShuttle::RetrieveConditionsData(const TObjArray& dateEntries)
3184 {
3185         //
3186         // Retrieve conditions data for all runs that aren't processed yet
3187         //
3188
3189         Bool_t hasError = kFALSE;
3190
3191         TIter iter(&dateEntries);
3192         AliShuttleLogbookEntry* anEntry;
3193
3194         while ((anEntry = (AliShuttleLogbookEntry*) iter.Next())){
3195                 if (!Process(anEntry)){
3196                         hasError = kTRUE;
3197                 }
3198
3199                 // clean SHUTTLE temp directory
3200                 //TString filename = Form("%s/*.shuttle", GetShuttleTempDir());
3201                 //RemoveFile(filename.Data());
3202         }
3203
3204         return hasError == kFALSE;
3205 }
3206
3207 //______________________________________________________________________________________________
3208 ULong_t AliShuttle::GetTimeOfLastAction() const
3209 {
3210         //
3211         // Gets time of last action
3212         //
3213
3214         ULong_t tmp;
3215
3216         fMonitoringMutex->Lock();
3217
3218         tmp = fLastActionTime;
3219
3220         fMonitoringMutex->UnLock();
3221
3222         return tmp;
3223 }
3224
3225 //______________________________________________________________________________________________
3226 const TString AliShuttle::GetLastAction() const
3227 {
3228         //
3229         // returns a string description of the last action
3230         //
3231
3232         TString tmp;
3233
3234         fMonitoringMutex->Lock();
3235         
3236         tmp = fLastAction;
3237         
3238         fMonitoringMutex->UnLock();
3239
3240         return tmp;
3241 }
3242
3243 //______________________________________________________________________________________________
3244 void AliShuttle::SetLastAction(const char* action)
3245 {
3246         //
3247         // updates the monitoring variables
3248         //
3249
3250         fMonitoringMutex->Lock();
3251
3252         fLastAction = action;
3253         fLastActionTime = time(0);
3254         
3255         fMonitoringMutex->UnLock();
3256 }
3257
3258 //______________________________________________________________________________________________
3259 const char* AliShuttle::GetRunParameter(const char* param)
3260 {
3261         //
3262         // returns run parameter read from DAQ logbook
3263         //
3264
3265         if(!fLogbookEntry) {
3266                 AliError("No logbook entry!");
3267                 return 0;
3268         }
3269
3270         return fLogbookEntry->GetRunParameter(param);
3271 }
3272
3273 //______________________________________________________________________________________________
3274 AliCDBEntry* AliShuttle::GetFromOCDB(const char* detector, const AliCDBPath& path)
3275 {
3276         //
3277         // returns object from OCDB valid for current run
3278         //
3279
3280         if (fTestMode & kErrorOCDB)
3281         {
3282                 Log(detector, "GetFromOCDB - In TESTMODE - Simulating error with OCDB");
3283                 return 0;
3284         }
3285         
3286         AliCDBStorage *sto = AliCDBManager::Instance()->GetStorage(fgkMainCDB);
3287         if (!sto)
3288         {
3289                 Log(detector, "GetFromOCDB - Cannot activate main OCDB for query!");
3290                 return 0;
3291         }
3292
3293         return dynamic_cast<AliCDBEntry*> (sto->Get(path, GetCurrentRun()));
3294 }
3295
3296 //______________________________________________________________________________________________
3297 Bool_t AliShuttle::SendMail()
3298 {
3299         //
3300         // sends a mail to the subdetector expert in case of preprocessor error
3301         //
3302         
3303         if (fTestMode != kNone)
3304                 return kTRUE;
3305
3306         TString to="";
3307         TIter iterExperts(fConfig->GetResponsibles(fCurrentDetector));
3308         TObjString *anExpert=0;
3309         while ((anExpert = (TObjString*) iterExperts.Next()))
3310         {
3311                 to += Form("%s,", anExpert->GetName());
3312         }
3313         if (to.Length() > 0)
3314           to.Remove(to.Length()-1);
3315         AliDebug(2, Form("to: %s",to.Data()));
3316
3317         if (to.IsNull()) {
3318                 Log("SHUTTLE", "List of detector responsibles not yet set!");
3319                 return kFALSE;
3320         }
3321
3322         void* dir = gSystem->OpenDirectory(GetShuttleLogDir());
3323         if (dir == NULL)
3324         {
3325                 if (gSystem->mkdir(GetShuttleLogDir(), kTRUE))
3326                 {
3327                         Log("SHUTTLE", Form("SendMail - Can't open directory <%s>", GetShuttleLogDir()));
3328                         return kFALSE;
3329                 }
3330
3331         } else {
3332                 gSystem->FreeDirectory(dir);
3333         }
3334
3335         TString bodyFileName;
3336         bodyFileName.Form("%s/mail.body", GetShuttleLogDir());
3337         gSystem->ExpandPathName(bodyFileName);
3338
3339         ofstream mailBody;
3340         mailBody.open(bodyFileName, ofstream::out);
3341
3342         if (!mailBody.is_open())
3343         {
3344                 Log("SHUTTLE", Form("Could not open mail body file %s", bodyFileName.Data()));
3345                 return kFALSE;
3346         }
3347
3348         TString cc="alberto.colla@cern.ch";
3349
3350         TString subject = Form("%s Shuttle preprocessor FAILED in run %d (run type = %s)!",
3351                                 fCurrentDetector.Data(), GetCurrentRun(), GetRunType());
3352         AliDebug(2, Form("subject: %s", subject.Data()));
3353
3354         TString body = Form("Dear %s expert(s), \n\n", fCurrentDetector.Data());
3355         body += Form("SHUTTLE just detected that your preprocessor "
3356                         "failed processing run %d (run type = %s)!!\n\n", 
3357                                         GetCurrentRun(), GetRunType());
3358         body += Form("Please check %s status on the SHUTTLE monitoring page: \n\n", 
3359                                 fCurrentDetector.Data());
3360         if (fConfig->GetRunMode() == AliShuttleConfig::kTest)
3361         {
3362                 body += Form("\thttp://pcalimonitor.cern.ch:8889/shuttle.jsp?time=168 \n\n");
3363         } else {
3364                 body += Form("\thttp://pcalimonitor.cern.ch/shuttle.jsp?instance=PROD&time=168 \n\n");
3365         }
3366         
3367         
3368         TString logFolder = "logs";
3369         if (fConfig->GetRunMode() == AliShuttleConfig::kProd) 
3370                 logFolder += "_PROD";
3371         
3372         
3373         body += Form("Find the %s log for the current run on \n\n"
3374                 "\thttp://pcalishuttle01.cern.ch:8880/%s/%d/%s_%d.log \n\n", 
3375                 fCurrentDetector.Data(), logFolder.Data(), GetCurrentRun(), 
3376                                 fCurrentDetector.Data(), GetCurrentRun());
3377         body += Form("The last 10 lines of %s log file are following:\n\n", fCurrentDetector.Data());
3378
3379         AliDebug(2, Form("Body begin: %s", body.Data()));
3380
3381         mailBody << body.Data();
3382         mailBody.close();
3383         mailBody.open(bodyFileName, ofstream::out | ofstream::app);
3384
3385         TString logFileName = Form("%s/%d/%s_%d.log", GetShuttleLogDir(), 
3386                 GetCurrentRun(), fCurrentDetector.Data(), GetCurrentRun());
3387         TString tailCommand = Form("tail -n 10 %s >> %s", logFileName.Data(), bodyFileName.Data());
3388         if (gSystem->Exec(tailCommand.Data()))
3389         {
3390                 mailBody << Form("%s log file not found ...\n\n", fCurrentDetector.Data());
3391         }
3392
3393         TString endBody = Form("------------------------------------------------------\n\n");
3394         endBody += Form("In case of problems please contact the SHUTTLE core team.\n\n");
3395         endBody += "Please do not answer this message directly, it is automatically generated.\n\n";
3396         endBody += "Greetings,\n\n \t\t\tthe SHUTTLE\n";
3397
3398         AliDebug(2, Form("Body end: %s", endBody.Data()));
3399
3400         mailBody << endBody.Data();
3401
3402         mailBody.close();
3403
3404         // send mail!
3405         TString mailCommand = Form("mail -s \"%s\" -c %s %s < %s",
3406                                                 subject.Data(),
3407                                                 cc.Data(),
3408                                                 to.Data(),
3409                                                 bodyFileName.Data());
3410         AliDebug(2, Form("mail command: %s", mailCommand.Data()));
3411
3412         Bool_t result = gSystem->Exec(mailCommand.Data());
3413
3414         return result == 0;
3415 }
3416
3417 //______________________________________________________________________________________________
3418 Bool_t AliShuttle::SendMailToDCS()
3419 {
3420         //
3421         // sends a mail to the DCS experts in case of DCS error
3422         //
3423         
3424         if (fTestMode != kNone)
3425                 return kTRUE;
3426
3427         void* dir = gSystem->OpenDirectory(GetShuttleLogDir());
3428         if (dir == NULL)
3429         {
3430                 if (gSystem->mkdir(GetShuttleLogDir(), kTRUE))
3431                 {
3432                         Log("SHUTTLE", Form("SendMailToDCS - Can't open directory <%s>", GetShuttleLogDir()));
3433                         return kFALSE;
3434                 }
3435
3436         } else {
3437                 gSystem->FreeDirectory(dir);
3438         }
3439
3440         TString bodyFileName;
3441         bodyFileName.Form("%s/mail.body", GetShuttleLogDir());
3442         gSystem->ExpandPathName(bodyFileName);
3443
3444         ofstream mailBody;
3445         mailBody.open(bodyFileName, ofstream::out);
3446
3447         if (!mailBody.is_open())
3448         {
3449                 Log("SHUTTLE", Form("SendMailToDCS - Could not open mail body file %s", bodyFileName.Data()));
3450                 return kFALSE;
3451         }
3452
3453         TString to="Vladimir.Fekete@cern.ch, Svetozar.Kapusta@cern.ch";
3454         //TString to="alberto.colla@cern.ch";
3455         AliDebug(2, Form("to: %s",to.Data()));
3456
3457         if (to.IsNull()) {
3458                 Log("SHUTTLE", "List of detector responsibles not yet set!");
3459                 return kFALSE;
3460         }
3461
3462         TString cc="alberto.colla@cern.ch";
3463
3464         TString subject = Form("Retrieval of data points for %s FAILED in run %d !",
3465                                 fCurrentDetector.Data(), GetCurrentRun());
3466         AliDebug(2, Form("subject: %s", subject.Data()));
3467
3468         TString body = Form("Dear DCS experts, \n\n");
3469         body += Form("SHUTTLE couldn\'t retrieve the data points for detector %s "
3470                         "in run %d!!\n\n", fCurrentDetector.Data(), GetCurrentRun());
3471         body += Form("Please check %s status on the SHUTTLE monitoring page: \n\n", 
3472                                 fCurrentDetector.Data());
3473         if (fConfig->GetRunMode() == AliShuttleConfig::kTest)
3474         {
3475                 body += Form("\thttp://pcalimonitor.cern.ch:8889/shuttle.jsp?time=168 \n\n");
3476         } else {
3477                 body += Form("\thttp://pcalimonitor.cern.ch/shuttle.jsp?instance=PROD?time=168 \n\n");
3478         }
3479
3480         TString logFolder = "logs";
3481         if (fConfig->GetRunMode() == AliShuttleConfig::kProd) 
3482                 logFolder += "_PROD";
3483         
3484         
3485         body += Form("Find the %s log for the current run on \n\n"
3486                 "\thttp://pcalishuttle01.cern.ch:8880/%s/%d/%s_%d.log \n\n", 
3487                 fCurrentDetector.Data(), logFolder.Data(), GetCurrentRun(), 
3488                                 fCurrentDetector.Data(), GetCurrentRun());
3489         body += Form("The last 10 lines of %s log file are following:\n\n", fCurrentDetector.Data());
3490
3491         AliDebug(2, Form("Body begin: %s", body.Data()));
3492
3493         mailBody << body.Data();
3494         mailBody.close();
3495         mailBody.open(bodyFileName, ofstream::out | ofstream::app);
3496
3497         TString logFileName = Form("%s/%d/%s_%d.log", GetShuttleLogDir(), GetCurrentRun(),
3498                 fCurrentDetector.Data(), GetCurrentRun());
3499         TString tailCommand = Form("tail -n 10 %s >> %s", logFileName.Data(), bodyFileName.Data());
3500         if (gSystem->Exec(tailCommand.Data()))
3501         {
3502                 mailBody << Form("%s log file not found ...\n\n", fCurrentDetector.Data());
3503         }
3504
3505         TString endBody = Form("------------------------------------------------------\n\n");
3506         endBody += Form("In case of problems please contact the SHUTTLE core team.\n\n");
3507         endBody += "Please do not answer this message directly, it is automatically generated.\n\n";
3508         endBody += "Greetings,\n\n \t\t\tthe SHUTTLE\n";
3509
3510         AliDebug(2, Form("Body end: %s", endBody.Data()));
3511
3512         mailBody << endBody.Data();
3513
3514         mailBody.close();
3515
3516         // send mail!
3517         TString mailCommand = Form("mail -s \"%s\" -c %s %s < %s",
3518                                                 subject.Data(),
3519                                                 cc.Data(),
3520                                                 to.Data(),
3521                                                 bodyFileName.Data());
3522         AliDebug(2, Form("mail command: %s", mailCommand.Data()));
3523
3524         Bool_t result = gSystem->Exec(mailCommand.Data());
3525
3526         return result == 0;
3527 }
3528
3529 //______________________________________________________________________________________________
3530 const char* AliShuttle::GetRunType()
3531 {
3532         //
3533         // returns run type read from "run type" logbook
3534         //
3535
3536         if(!fLogbookEntry) {
3537                 AliError("No logbook entry!");
3538                 return 0;
3539         }
3540
3541         return fLogbookEntry->GetRunType();
3542 }
3543
3544 //______________________________________________________________________________________________
3545 Bool_t AliShuttle::GetHLTStatus()
3546 {
3547         // Return HLT status (ON=1 OFF=0)
3548         // Converts the HLT status from the status string read in the run logbook (not just a bool)
3549
3550         if(!fLogbookEntry) {
3551                 AliError("No logbook entry!");
3552                 return 0;
3553         }
3554
3555         // TODO implement when HLTStatus is inserted in run logbook
3556         //TString hltStatus = fLogbookEntry->GetRunParameter("HLTStatus");
3557         //if(hltStatus == "OFF") {return kFALSE};
3558
3559         return kTRUE;
3560 }
3561
3562 //______________________________________________________________________________________________
3563 void AliShuttle::SetShuttleTempDir(const char* tmpDir)
3564 {
3565         //
3566         // sets Shuttle temp directory
3567         //
3568
3569         fgkShuttleTempDir = gSystem->ExpandPathName(tmpDir);
3570 }
3571
3572 //______________________________________________________________________________________________
3573 void AliShuttle::SetShuttleLogDir(const char* logDir)
3574 {
3575         //
3576         // sets Shuttle log directory
3577         //
3578
3579         fgkShuttleLogDir = gSystem->ExpandPathName(logDir);
3580 }