17a60660c7144422ec7800ac5b1a0112c80d3b51
[u/mrichter/AliRoot.git] / SHUTTLE / AliShuttle.cxx
1 /**************************************************************************
2  * Copyright(c) 1998-1999, ALICE Experiment at CERN, All rights reserved. *
3  *                                                                        *
4  * Author: The ALICE Off-line Project.                                    *
5  * Contributors are mentioned in the code where appropriate.              *
6  *                                                                        *
7  * Permission to use, copy, modify and distribute this software and its   *
8  * documentation strictly for non-commercial purposes is hereby granted   *
9  * without fee, provided that the above copyright notice appears in all   *
10  * copies and that both the copyright notice and this permission notice   *
11  * appear in the supporting documentation. The authors make no claims     *
12  * about the suitability of this software for any purpose. It is          *
13  * provided "as is" without express or implied warranty.                  *
14  **************************************************************************/
15
16 /*
17 $Log$
18 Revision 1.66  2007/12/05 10:45:19  jgrosseo
19 changed order of arguments to TMonaLisaWriter
20
21 Revision 1.65  2007/11/26 16:58:37  acolla
22 Monalisa configuration added: host and table name
23
24 Revision 1.64  2007/11/13 16:15:47  acolla
25 DCS map is stored in a file in the temp folder where the detector is processed.
26 If the preprocessor fails, the temp folder is not removed. This will help the debugging of the problem.
27
28 Revision 1.63  2007/11/02 10:53:16  acolla
29 Protection added to AliShuttle::CopyFileLocally
30
31 Revision 1.62  2007/10/31 18:23:13  acolla
32 Furter developement on the Shuttle:
33
34 - Shuttle now connects to the Grid as alidaq. The OCDB and Reference folders
35 are now built from /alice/data, e.g.:
36 /alice/data/2007/LHC07a/OCDB
37
38 the year and LHC period are taken from the Shuttle.
39 Raw metadata files are stored by GRP to:
40 /alice/data/2007/LHC07a/<runNb>/Raw/RunMetadata.root
41
42 - Shuttle sends a mail to DCS experts each time DP retrieval fails.
43
44 Revision 1.61  2007/10/30 20:33:51  acolla
45 Improved managing of temporary folders, which weren't correctly handled.
46 Resolved bug introduced in StoreReferenceFile, which caused SPD preprocessor fail.
47
48 Revision 1.60  2007/10/29 18:06:16  acolla
49
50 New function StoreRunMetadataFile added to preprocessor and Shuttle interface
51 This function can be used by GRP only. It stores raw data tags merged file to the
52 raw data folder (e.g. /alice/data/2008/LHC08a/000099999/Raw).
53
54 KNOWN ISSUES:
55
56 1. Shuttle cannot write to /alice/data/ because it belongs to alidaq. Tag file is stored in /alice/simulation/... for the time being.
57 2. Due to a bug in TAlien::Mkdir, the creation of a folder in recursive mode (-p option) does not work. The problem
58 has been corrected in the root package on the Shuttle machine.
59
60 Revision 1.59  2007/10/05 12:40:55  acolla
61
62 Result error code added to AliDCSClient data members (it was "lost" with the new implementation of TMap* GetAliasValues and GetDPValues).
63
64 Revision 1.58  2007/09/28 15:27:40  acolla
65
66 AliDCSClient "multiSplit" option added in the DCS configuration
67 in AliDCSMessage: variable MAX_BODY_SIZE set to 500000
68
69 Revision 1.57  2007/09/27 16:53:13  acolla
70 Detectors can have more than one AMANDA server. SHUTTLE queries the servers sequentially,
71 merges the dcs aliases/DPs in one TMap and sends it to the preprocessor.
72
73 Revision 1.56  2007/09/14 16:46:14  jgrosseo
74 1) Connect and Close are called before and after each query, so one can
75 keep the same AliDCSClient object.
76 2) The splitting of a query is moved to GetDPValues/GetAliasValues.
77 3) Splitting interval can be specified in constructor
78
79 Revision 1.55  2007/08/06 12:26:40  acolla
80 Function Bool_t GetHLTStatus added to preprocessor. It returns the status of HLT
81 read from the run logbook.
82
83 Revision 1.54  2007/07/12 09:51:25  jgrosseo
84 removed duplicated log message in GetFile
85
86 Revision 1.53  2007/07/12 09:26:28  jgrosseo
87 updating hlt fxs base path
88
89 Revision 1.52  2007/07/12 08:06:45  jgrosseo
90 adding log messages in getfile... functions
91 adding not implemented copy constructor in alishuttleconfigholder
92
93 Revision 1.51  2007/07/03 17:24:52  acolla
94 root moved to v5-16-00. TFileMerger->Cp moved to TFile::Cp.
95
96 Revision 1.50  2007/07/02 17:19:32  acolla
97 preprocessor is run in a temp directory that is removed when process is finished.
98
99 Revision 1.49  2007/06/29 10:45:06  acolla
100 Number of columns in MySql Shuttle logbook increased by one (HLT added)
101
102 Revision 1.48  2007/06/21 13:06:19  acolla
103 GetFileSources returns dummy list with 1 source if system=DCS (better than
104 returning error as it was)
105
106 Revision 1.47  2007/06/19 17:28:56  acolla
107 HLT updated; missing map bug removed.
108
109 Revision 1.46  2007/06/09 13:01:09  jgrosseo
110 Switching to retrieval of several DCS DPs at a time (multiDPrequest)
111
112 Revision 1.45  2007/05/30 06:35:20  jgrosseo
113 Adding functionality to the Shuttle/TestShuttle:
114 o) Function to retrieve list of sources from a given system (GetFileSources with id=0)
115 o) Function to retrieve list of IDs for a given source      (GetFileIDs)
116 These functions are needed for dealing with the tag files that are saved for the GRP preprocessor
117 Example code has been added to the TestProcessor in TestShuttle
118
119 Revision 1.44  2007/05/11 16:09:32  acolla
120 Reference files for ITS, MUON and PHOS are now stored in OfflineDetName/OnlineDetName/run_...
121 example: ITS/SPD/100_filename.root
122
123 Revision 1.43  2007/05/10 09:59:51  acolla
124 Various bug fixes in StoreRefFilesToGrid; Cleaning of reference storage before processing detector (CleanReferenceStorage)
125
126 Revision 1.42  2007/05/03 08:01:39  jgrosseo
127 typo in last commit :-(
128
129 Revision 1.41  2007/05/03 08:00:48  jgrosseo
130 fixing log message when pp want to skip dcs value retrieval
131
132 Revision 1.40  2007/04/27 07:06:48  jgrosseo
133 GetFileSources returns empty list in case of no files, but successful query
134 No mails sent in testmode
135
136 Revision 1.39  2007/04/17 12:43:57  acolla
137 Correction in StoreOCDB; change of text in mail to detector expert
138
139 Revision 1.38  2007/04/12 08:26:18  jgrosseo
140 updated comment
141
142 Revision 1.37  2007/04/10 16:53:14  jgrosseo
143 redirecting sub detector stdout, stderr to sub detector log file
144
145 Revision 1.35  2007/04/04 16:26:38  acolla
146 1. Re-organization of function calls in TestPreprocessor to make it more meaningful.
147 2. Added missing dependency in test preprocessors.
148 3. in AliShuttle.cxx: processing time and memory consumption info on a single line.
149
150 Revision 1.34  2007/04/04 10:33:36  jgrosseo
151 1) Storing of files to the Grid is now done _after_ your preprocessors succeeded. This is transparent, which means that you can still use the same functions (Store, StoreReferenceData) to store files to the Grid. However, the Shuttle first stores them locally and transfers them after the preprocessor finished. The return code of these two functions has changed from UInt_t to Bool_t which gives you the success of the storing.
152 In case of an error with the Grid, the Shuttle will retry the storing later, the preprocessor does not need to be run again.
153
154 2) The meaning of the return code of the preprocessor has changed. 0 is now success and any other value means failure. This value is stored in the log and you can use it to keep details about the error condition.
155
156 3) New function StoreReferenceFile to _directly_ store a file (without opening it) to the reference storage.
157
158 4) The memory usage of the preprocessor is monitored. If it exceeds 2 GB it is terminated.
159
160 5) New function AliPreprocessor::ProcessDCS(). If you do not need to have DCS data in all cases, you can skip the processing by implemting this function and returning kFALSE under certain conditions. E.g. if there is a certain run type.
161 If you always need DCS data (like before), you do not need to implement it.
162
163 6) The run type has been added to the monitoring page
164
165 Revision 1.33  2007/04/03 13:56:01  acolla
166 Grid Storage at the end of preprocessing. Added virtual method to disable DCS query according to the
167 run type.
168
169 Revision 1.32  2007/02/28 10:41:56  acolla
170 Run type field added in SHUTTLE framework. Run type is read from "run type" logbook and retrieved by
171 AliPreprocessor::GetRunType() function.
172 Added some ldap definition files.
173
174 Revision 1.30  2007/02/13 11:23:21  acolla
175 Moved getters and setters of Shuttle's main OCDB/Reference, local
176 OCDB/Reference, temp and log folders to AliShuttleInterface
177
178 Revision 1.27  2007/01/30 17:52:42  jgrosseo
179 adding monalisa monitoring
180
181 Revision 1.26  2007/01/23 19:20:03  acolla
182 Removed old ldif files, added TOF, MCH ldif files. Added some options in
183 AliShuttleConfig::Print. Added in Ali Shuttle: SetShuttleTempDir and
184 SetShuttleLogDir
185
186 Revision 1.25  2007/01/15 19:13:52  acolla
187 Moved some AliInfo to AliDebug in SendMail function
188
189 Revision 1.21  2006/12/07 08:51:26  jgrosseo
190 update (alberto):
191 table, db names in ldap configuration
192 added GRP preprocessor
193 DCS data can also be retrieved by data point
194
195 Revision 1.20  2006/11/16 16:16:48  jgrosseo
196 introducing strict run ordering flag
197 removed giving preprocessor name to preprocessor, they have to know their name themselves ;-)
198
199 Revision 1.19  2006/11/06 14:23:04  jgrosseo
200 major update (Alberto)
201 o) reading of run parameters from the logbook
202 o) online offline naming conversion
203 o) standalone DCSclient package
204
205 Revision 1.18  2006/10/20 15:22:59  jgrosseo
206 o) Adding time out to the execution of the preprocessors: The Shuttle forks and the parent process monitors the child
207 o) Merging Collect, CollectAll, CollectNew function
208 o) Removing implementation of empty copy constructors (declaration still there!)
209
210 Revision 1.17  2006/10/05 16:20:55  jgrosseo
211 adapting to new CDB classes
212
213 Revision 1.16  2006/10/05 15:46:26  jgrosseo
214 applying to the new interface
215
216 Revision 1.15  2006/10/02 16:38:39  jgrosseo
217 update (alberto):
218 fixed memory leaks
219 storing of objects that failed to be stored to the grid before
220 interfacing of shuttle status table in daq system
221
222 Revision 1.14  2006/08/29 09:16:05  jgrosseo
223 small update
224
225 Revision 1.13  2006/08/15 10:50:00  jgrosseo
226 effc++ corrections (alberto)
227
228 Revision 1.12  2006/08/08 14:19:29  jgrosseo
229 Update to shuttle classes (Alberto)
230
231 - Possibility to set the full object's path in the Preprocessor's and
232 Shuttle's  Store functions
233 - Possibility to extend the object's run validity in the same classes
234 ("startValidity" and "validityInfinite" parameters)
235 - Implementation of the StoreReferenceData function to store reference
236 data in a dedicated CDB storage.
237
238 Revision 1.11  2006/07/21 07:37:20  jgrosseo
239 last run is stored after each run
240
241 Revision 1.10  2006/07/20 09:54:40  jgrosseo
242 introducing status management: The processing per subdetector is divided into several steps,
243 after each step the status is stored on disk. If the system crashes in any of the steps the Shuttle
244 can keep track of the number of failures and skips further processing after a certain threshold is
245 exceeded. These thresholds can be configured in LDAP.
246
247 Revision 1.9  2006/07/19 10:09:55  jgrosseo
248 new configuration, accesst to DAQ FES (Alberto)
249
250 Revision 1.8  2006/07/11 12:44:36  jgrosseo
251 adding parameters for extended validity range of data produced by preprocessor
252
253 Revision 1.7  2006/07/10 14:37:09  jgrosseo
254 small fix + todo comment
255
256 Revision 1.6  2006/07/10 13:01:41  jgrosseo
257 enhanced storing of last sucessfully processed run (alberto)
258
259 Revision 1.5  2006/07/04 14:59:57  jgrosseo
260 revision of AliDCSValue: Removed wrapper classes, reduced storage size per value by factor 2
261
262 Revision 1.4  2006/06/12 09:11:16  jgrosseo
263 coding conventions (Alberto)
264
265 Revision 1.3  2006/06/06 14:26:40  jgrosseo
266 o) removed files that were moved to STEER
267 o) shuttle updated to follow the new interface (Alberto)
268
269 Revision 1.2  2006/03/07 07:52:34  hristov
270 New version (B.Yordanov)
271
272 Revision 1.6  2005/11/19 17:19:14  byordano
273 RetrieveDATEEntries and RetrieveConditionsData added
274
275 Revision 1.5  2005/11/19 11:09:27  byordano
276 AliShuttle declaration added
277
278 Revision 1.4  2005/11/17 17:47:34  byordano
279 TList changed to TObjArray
280
281 Revision 1.3  2005/11/17 14:43:23  byordano
282 import to local CVS
283
284 Revision 1.1.1.1  2005/10/28 07:33:58  hristov
285 Initial import as subdirectory in AliRoot
286
287 Revision 1.2  2005/09/13 08:41:15  byordano
288 default startTime endTime added
289
290 Revision 1.4  2005/08/30 09:13:02  byordano
291 some docs added
292
293 Revision 1.3  2005/08/29 21:15:47  byordano
294 some docs added
295
296 */
297
298 //
299 // This class is the main manager for AliShuttle. 
300 // It organizes the data retrieval from DCS and call the 
301 // interface methods of AliPreprocessor.
302 // For every detector in AliShuttleConfgi (see AliShuttleConfig),
303 // data for its set of aliases is retrieved. If there is registered
304 // AliPreprocessor for this detector then it will be used
305 // accroding to the schema (see AliPreprocessor).
306 // If there isn't registered AliPreprocessor than the retrieved
307 // data is stored automatically to the undelying AliCDBStorage.
308 // For detSpec is used the alias name.
309 //
310
311 #include "AliShuttle.h"
312
313 #include "AliCDBManager.h"
314 #include "AliCDBStorage.h"
315 #include "AliCDBId.h"
316 #include "AliCDBRunRange.h"
317 #include "AliCDBPath.h"
318 #include "AliCDBEntry.h"
319 #include "AliShuttleConfig.h"
320 #include "DCSClient/AliDCSClient.h"
321 #include "AliLog.h"
322 #include "AliPreprocessor.h"
323 #include "AliShuttleStatus.h"
324 #include "AliShuttleLogbookEntry.h"
325
326 #include <TSystem.h>
327 #include <TObject.h>
328 #include <TString.h>
329 #include <TTimeStamp.h>
330 #include <TObjString.h>
331 #include <TSQLServer.h>
332 #include <TSQLResult.h>
333 #include <TSQLRow.h>
334 #include <TMutex.h>
335 #include <TSystemDirectory.h>
336 #include <TSystemFile.h>
337 #include <TFile.h>
338 #include <TGrid.h>
339 #include <TGridResult.h>
340
341 #include <TMonaLisaWriter.h>
342
343 #include <fstream>
344
345 #include <sys/types.h>
346 #include <sys/wait.h>
347
348 ClassImp(AliShuttle)
349
350 //______________________________________________________________________________________________
351 AliShuttle::AliShuttle(const AliShuttleConfig* config,
352                 UInt_t timeout, Int_t retries):
353 fConfig(config),
354 fTimeout(timeout), fRetries(retries),
355 fPreprocessorMap(),
356 fLogbookEntry(0),
357 fCurrentDetector(),
358 fStatusEntry(0),
359 fMonitoringMutex(0),
360 fLastActionTime(0),
361 fLastAction(),
362 fMonaLisa(0),
363 fTestMode(kNone),
364 fReadTestMode(kFALSE),
365 fOutputRedirected(kFALSE)
366 {
367         //
368         // config: AliShuttleConfig used
369         // timeout: timeout used for AliDCSClient connection
370         // retries: the number of retries in case of connection error.
371         //
372
373         if (!fConfig->IsValid()) AliFatal("********** !!!!! Invalid configuration !!!!! **********");
374         for(int iSys=0;iSys<4;iSys++) {
375                 fServer[iSys]=0;
376                 if (iSys < 3)
377                         fFXSlist[iSys].SetOwner(kTRUE);
378         }
379         fPreprocessorMap.SetOwner(kTRUE);
380
381         for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
382                 fFirstUnprocessed[iDet] = kFALSE;
383
384         fMonitoringMutex = new TMutex();
385 }
386
387 //______________________________________________________________________________________________
388 AliShuttle::~AliShuttle()
389 {
390         //
391         // destructor
392         //
393
394         fPreprocessorMap.DeleteAll();
395         for(int iSys=0;iSys<4;iSys++)
396                 if(fServer[iSys]) {
397                         fServer[iSys]->Close();
398                         delete fServer[iSys];
399                         fServer[iSys] = 0;
400                 }
401
402         if (fStatusEntry){
403                 delete fStatusEntry;
404                 fStatusEntry = 0;
405         }
406         
407         if (fMonitoringMutex) 
408         {
409                 delete fMonitoringMutex;
410                 fMonitoringMutex = 0;
411         }
412 }
413
414 //______________________________________________________________________________________________
415 void AliShuttle::RegisterPreprocessor(AliPreprocessor* preprocessor)
416 {
417         //
418         // Registers new AliPreprocessor.
419         // It uses GetName() for indentificator of the pre processor.
420         // The pre processor is registered it there isn't any other
421         // with the same identificator (GetName()).
422         //
423
424         const char* detName = preprocessor->GetName();
425         if(GetDetPos(detName) < 0)
426                 AliFatal(Form("********** !!!!! Invalid detector name: %s !!!!! **********", detName));
427
428         if (fPreprocessorMap.GetValue(detName)) {
429                 AliWarning(Form("AliPreprocessor %s is already registered!", detName));
430                 return;
431         }
432
433         fPreprocessorMap.Add(new TObjString(detName), preprocessor);
434 }
435 //______________________________________________________________________________________________
436 Bool_t AliShuttle::Store(const AliCDBPath& path, TObject* object,
437                 AliCDBMetaData* metaData, Int_t validityStart, Bool_t validityInfinite)
438 {
439         // Stores a CDB object in the storage for offline reconstruction. Objects that are not needed for
440         // offline reconstruction, but should be stored anyway (e.g. for debugging) should NOT be stored
441         // using this function. Use StoreReferenceData instead!
442         // It calls StoreLocally function which temporarily stores the data locally; when the preprocessor
443         // finishes the data are transferred to the main storage (Grid).
444
445         return StoreLocally(fgkLocalCDB, path, object, metaData, validityStart, validityInfinite);
446 }
447
448 //______________________________________________________________________________________________
449 Bool_t AliShuttle::StoreReferenceData(const AliCDBPath& path, TObject* object, AliCDBMetaData* metaData)
450 {
451         // Stores a CDB object in the storage for reference data. This objects will not be available during
452         // offline reconstrunction. Use this function for reference data only!
453         // It calls StoreLocally function which temporarily stores the data locally; when the preprocessor
454         // finishes the data are transferred to the main storage (Grid).
455
456         return StoreLocally(fgkLocalRefStorage, path, object, metaData);
457 }
458
459 //______________________________________________________________________________________________
460 Bool_t AliShuttle::StoreLocally(const TString& localUri,
461                         const AliCDBPath& path, TObject* object, AliCDBMetaData* metaData,
462                         Int_t validityStart, Bool_t validityInfinite)
463 {
464         // Store object temporarily in local storage. Parameters are passed by Store and StoreReferenceData functions.
465         // when the preprocessor finishes the data are transferred to the main storage (Grid).
466         // The parameters are:
467         //   1) Uri of the backup storage (Local)
468         //   2) the object's path.
469         //   3) the object to be stored
470         //   4) the metaData to be associated with the object
471         //   5) the validity start run number w.r.t. the current run,
472         //      if the data is valid only for this run leave the default 0
473         //   6) specifies if the calibration data is valid for infinity (this means until updated),
474         //      typical for calibration runs, the default is kFALSE
475         //
476         // returns 0 if fail, 1 otherwise
477
478         if (fTestMode & kErrorStorage)
479         {
480                 Log(fCurrentDetector, "StoreLocally - In TESTMODE - Simulating error while storing locally");
481                 return kFALSE;
482         }
483         
484         const char* cdbType = (localUri == fgkLocalCDB) ? "CDB" : "Reference";
485
486         Int_t firstRun = GetCurrentRun() - validityStart;
487         if(firstRun < 0) {
488                 AliWarning("First valid run happens to be less than 0! Setting it to 0.");
489                 firstRun=0;
490         }
491
492         Int_t lastRun = -1;
493         if(validityInfinite) {
494                 lastRun = AliCDBRunRange::Infinity();
495         } else {
496                 lastRun = GetCurrentRun();
497         }
498
499         // Version is set to current run, it will be used later to transfer data to Grid
500         AliCDBId id(path, firstRun, lastRun, GetCurrentRun(), -1);
501
502         if(! dynamic_cast<TObjString*> (metaData->GetProperty("RunUsed(TObjString)"))){
503                 TObjString runUsed = Form("%d", GetCurrentRun());
504                 metaData->SetProperty("RunUsed(TObjString)", runUsed.Clone());
505         }
506
507         Bool_t result = kFALSE;
508
509         if (!(AliCDBManager::Instance()->GetStorage(localUri))) {
510                 Log("SHUTTLE", Form("StoreLocally - Cannot activate local %s storage", cdbType));
511         } else {
512                 result = AliCDBManager::Instance()->GetStorage(localUri)
513                                         ->Put(object, id, metaData);
514         }
515
516         if(!result) {
517
518                 Log(fCurrentDetector, Form("StoreLocally - Can't store object <%s>!", id.ToString().Data()));
519         }
520
521         return result;
522 }
523
524 //______________________________________________________________________________________________
525 Bool_t AliShuttle::StoreOCDB()
526 {
527         //
528         // Called when preprocessor ends successfully or when previous storage attempt failed (kStoreError status)
529         // Calls underlying StoreOCDB(const char*) function twice, for OCDB and Reference storage.
530         // Then calls StoreRefFilesToGrid to store reference files. 
531         //
532         
533         if (fTestMode & kErrorGrid)
534         {
535                 Log("SHUTTLE", "StoreOCDB - In TESTMODE - Simulating error while storing in the Grid");
536                 Log(fCurrentDetector, "StoreOCDB - In TESTMODE - Simulating error while storing in the Grid");
537                 return kFALSE;
538         }
539         
540         Log("SHUTTLE","StoreOCDB - Storing OCDB data ...");
541         Bool_t resultCDB = StoreOCDB(fgkMainCDB);
542
543         Log("SHUTTLE","StoreOCDB - Storing reference data ...");
544         Bool_t resultRef = StoreOCDB(fgkMainRefStorage);
545         
546         Log("SHUTTLE","StoreOCDB - Storing reference files ...");
547         Bool_t resultRefFiles = CopyFilesToGrid("reference");
548         
549         Bool_t resultMetadata = kTRUE;
550         if(fCurrentDetector == "GRP") 
551         {
552                 Log("StoreOCDB - SHUTTLE","Storing Run Metadata file ...");
553                 resultMetadata = CopyFilesToGrid("metadata");
554         }
555         
556         return resultCDB && resultRef && resultRefFiles && resultMetadata;
557 }
558
559 //______________________________________________________________________________________________
560 Bool_t AliShuttle::StoreOCDB(const TString& gridURI)
561 {
562         //
563         // Called by StoreOCDB(), performs actual storage to the main OCDB and reference storages (Grid)
564         //
565
566         TObjArray* gridIds=0;
567
568         Bool_t result = kTRUE;
569
570         const char* type = 0;
571         TString localURI;
572         if(gridURI == fgkMainCDB) {
573                 type = "OCDB";
574                 localURI = fgkLocalCDB;
575         } else if(gridURI == fgkMainRefStorage) {
576                 type = "reference";
577                 localURI = fgkLocalRefStorage;
578         } else {
579                 AliError(Form("Invalid storage URI: %s", gridURI.Data()));
580                 return kFALSE;
581         }
582
583         AliCDBManager* man = AliCDBManager::Instance();
584
585         AliCDBStorage *gridSto = man->GetStorage(gridURI);
586         if(!gridSto) {
587                 Log("SHUTTLE",
588                         Form("StoreOCDB - cannot activate main %s storage", type));
589                 return kFALSE;
590         }
591
592         gridIds = gridSto->GetQueryCDBList();
593
594         // get objects previously stored in local CDB
595         AliCDBStorage *localSto = man->GetStorage(localURI);
596         if(!localSto) {
597                 Log("SHUTTLE",
598                         Form("StoreOCDB - cannot activate local %s storage", type));
599                 return kFALSE;
600         }
601         AliCDBPath aPath(GetOfflineDetName(fCurrentDetector.Data()),"*","*");
602         // Local objects were stored with current run as Grid version!
603         TList* localEntries = localSto->GetAll(aPath.GetPath(), GetCurrentRun(), GetCurrentRun());
604         localEntries->SetOwner(1);
605
606         // loop on local stored objects
607         TIter localIter(localEntries);
608         AliCDBEntry *aLocEntry = 0;
609         while((aLocEntry = dynamic_cast<AliCDBEntry*> (localIter.Next()))){
610                 aLocEntry->SetOwner(1);
611                 AliCDBId aLocId = aLocEntry->GetId();
612                 aLocEntry->SetVersion(-1);
613                 aLocEntry->SetSubVersion(-1);
614
615                 // If local object is valid up to infinity we store it only if it is
616                 // the first unprocessed run!
617                 if (aLocId.GetLastRun() == AliCDBRunRange::Infinity() &&
618                         !fFirstUnprocessed[GetDetPos(fCurrentDetector)])
619                 {
620                         Log("SHUTTLE", Form("StoreOCDB - %s: object %s has validity infinite but "
621                                                 "there are previous unprocessed runs!",
622                                                 fCurrentDetector.Data(), aLocId.GetPath().Data()));
623                         continue;
624                 }
625
626                 // loop on Grid valid Id's
627                 Bool_t store = kTRUE;
628                 TIter gridIter(gridIds);
629                 AliCDBId* aGridId = 0;
630                 while((aGridId = dynamic_cast<AliCDBId*> (gridIter.Next()))){
631                         if(aGridId->GetPath() != aLocId.GetPath()) continue;
632                         // skip all objects valid up to infinity
633                         if(aGridId->GetLastRun() == AliCDBRunRange::Infinity()) continue;
634                         // if we get here, it means there's already some more recent object stored on Grid!
635                         store = kFALSE;
636                         break;
637                 }
638
639                 // If we get here, the file can be stored!
640                 Bool_t storeOk = gridSto->Put(aLocEntry);
641                 if(!store || storeOk){
642
643                         if (!store)
644                         {
645                                 Log(fCurrentDetector.Data(),
646                                         Form("StoreOCDB - A more recent object already exists in %s storage: <%s>",
647                                                 type, aGridId->ToString().Data()));
648                         } else {
649                                 Log("SHUTTLE",
650                                         Form("StoreOCDB - Object <%s> successfully put into %s storage",
651                                                 aLocId.ToString().Data(), type));
652                                 Log(fCurrentDetector.Data(),
653                                         Form("StoreOCDB - Object <%s> successfully put into %s storage",
654                                                 aLocId.ToString().Data(), type));
655                         }
656
657                         // removing local filename...
658                         TString filename;
659                         localSto->IdToFilename(aLocId, filename);
660                         Log("SHUTTLE", Form("StoreOCDB - Removing local file %s", filename.Data()));
661                         RemoveFile(filename.Data());
662                         continue;
663                 } else  {
664                         Log("SHUTTLE",
665                                 Form("StoreOCDB - Grid %s storage of object <%s> failed",
666                                         type, aLocId.ToString().Data()));
667                         Log(fCurrentDetector.Data(),
668                                 Form("StoreOCDB - Grid %s storage of object <%s> failed",
669                                         type, aLocId.ToString().Data()));
670                         result = kFALSE;
671                 }
672         }
673         localEntries->Clear();
674
675         return result;
676 }
677
678 //______________________________________________________________________________________________
679 Bool_t AliShuttle::CleanReferenceStorage(const char* detector)
680 {
681         // clears the directory used to store reference files of a given subdetector
682   
683         AliCDBManager* man = AliCDBManager::Instance();
684         AliCDBStorage* sto = man->GetStorage(fgkLocalRefStorage);
685         TString localBaseFolder = sto->GetBaseFolder();
686
687         TString targetDir = GetRefFilePrefix(localBaseFolder.Data(), detector);
688         
689         Log("SHUTTLE", Form("CleanReferenceStorage - Cleaning %s", targetDir.Data()));
690
691         TString begin;
692         begin.Form("%d_", GetCurrentRun());
693         
694         TSystemDirectory* baseDir = new TSystemDirectory("/", targetDir);
695         if (!baseDir)
696                 return kTRUE;
697                 
698         TList* dirList = baseDir->GetListOfFiles();
699         delete baseDir;
700         
701         if (!dirList) return kTRUE;
702                         
703         if (dirList->GetEntries() < 3) 
704         {
705                 delete dirList;
706                 return kTRUE;
707         }
708                                 
709         Int_t nDirs = 0, nDel = 0;
710         TIter dirIter(dirList);
711         TSystemFile* entry = 0;
712
713         Bool_t success = kTRUE;
714         
715         while ((entry = dynamic_cast<TSystemFile*> (dirIter.Next())))
716         {                                       
717                 if (entry->IsDirectory())
718                         continue;
719                 
720                 TString fileName(entry->GetName());
721                 if (!fileName.BeginsWith(begin))
722                         continue;
723                         
724                 nDirs++;
725                                                 
726                 // delete file
727                 Int_t result = gSystem->Unlink(fileName.Data());
728                 
729                 if (result)
730                 {
731                         Log("SHUTTLE", Form("CleanReferenceStorage - Could not delete file %s!", fileName.Data()));
732                         success = kFALSE;
733                 } else {
734                         nDel++;
735                 }
736         }
737
738         if(nDirs > 0)
739                 Log("SHUTTLE", Form("CleanReferenceStorage - %d (over %d) reference files in folder %s were deleted.", 
740                         nDel, nDirs, targetDir.Data()));
741
742                 
743         delete dirList;
744         return success;
745
746
747
748
749
750
751   Int_t result = gSystem->GetPathInfo(targetDir, 0, (Long64_t*) 0, 0, 0);
752   if (result == 0)
753   {
754     // delete directory
755     result = gSystem->Exec(Form("rm -rf %s", targetDir.Data()));
756     if (result != 0)
757     {  
758       Log("SHUTTLE", Form("CleanReferenceStorage - Could not clean directory %s", targetDir.Data()));
759       return kFALSE;
760     }
761   }
762
763   result = gSystem->mkdir(targetDir, kTRUE);
764   if (result != 0)
765   {
766     Log("SHUTTLE", Form("CleanReferenceStorage - Error creating base directory %s", targetDir.Data()));
767     return kFALSE;
768   }
769         
770   return kTRUE;
771 }
772
773 //______________________________________________________________________________________________
774 Bool_t AliShuttle::StoreReferenceFile(const char* detector, const char* localFile, const char* gridFileName)
775 {
776         //
777         // Stores reference file directly (without opening it). This function stores the file locally.
778         //
779         // The file is stored under the following location: 
780         // <base folder of local reference storage>/<DET>/<RUN#>_<gridFileName>
781         // where <gridFileName> is the second parameter given to the function
782         // 
783         
784         if (fTestMode & kErrorStorage)
785         {
786                 Log(fCurrentDetector, "StoreReferenceFile - In TESTMODE - Simulating error while storing locally");
787                 return kFALSE;
788         }
789         
790         AliCDBManager* man = AliCDBManager::Instance();
791         AliCDBStorage* sto = man->GetStorage(fgkLocalRefStorage);
792         
793         TString localBaseFolder = sto->GetBaseFolder();
794         
795         TString target = GetRefFilePrefix(localBaseFolder.Data(), detector);    
796         target.Append(Form("/%d_%s", GetCurrentRun(), gridFileName));
797         
798         return CopyFileLocally(localFile, target);
799 }
800
801 //______________________________________________________________________________________________
802 Bool_t AliShuttle::StoreRunMetadataFile(const char* localFile, const char* gridFileName)
803 {
804         //
805         // Stores Run metadata file to the Grid, in the run folder
806         //
807         // Only GRP can call this function.
808         
809         if (fTestMode & kErrorStorage)
810         {
811                 Log(fCurrentDetector, "StoreRunMetaDataFile - In TESTMODE - Simulating error while storing locally");
812                 return kFALSE;
813         }
814         
815         AliCDBManager* man = AliCDBManager::Instance();
816         AliCDBStorage* sto = man->GetStorage(fgkLocalRefStorage);
817         
818         TString localBaseFolder = sto->GetBaseFolder();
819         
820         // Build Run level folder
821         // folder = /alice/data/year/lhcPeriod/runNb/Raw
822         
823                 
824         TString lhcPeriod = GetLHCPeriod();     
825         if (lhcPeriod.Length() == 0) 
826         {
827                 Log("SHUTTLE","StoreRunMetaDataFile - LHCPeriod not found in logbook!");
828                 return 0;
829         }
830         
831         TString target = Form("%s/GRP/RunMetadata/alice/data/%d/%s/%09d/Raw/%s", 
832                                 localBaseFolder.Data(), GetCurrentYear(), 
833                                 lhcPeriod.Data(), GetCurrentRun(), gridFileName);
834                                         
835         return CopyFileLocally(localFile, target);
836 }
837
838 //______________________________________________________________________________________________
839 Bool_t AliShuttle::CopyFileLocally(const char* localFile, const TString& target)
840 {
841         //
842         // Stores file locally. Called by StoreReferenceFile and StoreRunMetadataFile
843         // Files are temporarily stored in the local reference storage. When the preprocessor 
844         // finishes, the Shuttle calls CopyFilesToGrid to transfer the files to AliEn 
845         // (in reference or run level folders)
846         //
847         
848         TString targetDir(target(0, target.Last('/')));
849         
850         //try to open base dir folder, if it does not exist
851         void* dir = gSystem->OpenDirectory(targetDir.Data());
852         if (dir == NULL) {
853                 if (gSystem->mkdir(targetDir.Data(), kTRUE)) {
854                         Log("SHUTTLE", Form("StoreFileLocally - Can't open directory <%s>", targetDir.Data()));
855                         return kFALSE;
856                 }
857
858         } else {
859                 gSystem->FreeDirectory(dir);
860         }
861         
862         Int_t result = 0;
863         
864         result = gSystem->GetPathInfo(localFile, 0, (Long64_t*) 0, 0, 0);
865         if (result)
866         {
867                 Log("SHUTTLE", Form("StoreFileLocally - %s does not exist", localFile));
868                 return kFALSE;
869         }
870
871         result = gSystem->GetPathInfo(target, 0, (Long64_t*) 0, 0, 0);
872         if (!result)
873         {
874                 Log("SHUTTLE", Form("StoreFileLocally - target file %s already exist, removing...", target.Data()));
875                 if (gSystem->Unlink(target.Data()))
876                 {
877                         Log("SHUTTLE", Form("StoreFileLocally - Could not remove existing target file %s!", target.Data()));
878                         return kFALSE;
879                 }
880         }       
881         
882         result = gSystem->CopyFile(localFile, target);
883
884         if (result == 0)
885         {
886                 Log("SHUTTLE", Form("StoreFileLocally - File %s stored locally to %s", localFile, target.Data()));
887                 return kTRUE;
888         }
889         else
890         {
891                 Log("SHUTTLE", Form("StoreFileLocally - Could not store file %s to %s! Error code = %d", 
892                                 localFile, target.Data(), result));
893                 return kFALSE;
894         }       
895
896
897
898 }
899
900 //______________________________________________________________________________________________
901 Bool_t AliShuttle::CopyFilesToGrid(const char* type)
902 {
903         //
904         // Transfers local files to the Grid. Local files can be reference files 
905         // or run metadata file (from GRP only).
906         //
907         // According to the type (ref, metadata) the files are stored under the following location: 
908         // ref --> <base folder of reference storage>/<DET>/<RUN#>_<gridFileName>
909         // metadata --> <run data folder>/<MetadataFileName>
910         //
911                 
912         AliCDBManager* man = AliCDBManager::Instance();
913         AliCDBStorage* sto = man->GetStorage(fgkLocalRefStorage);
914         if (!sto)
915                 return kFALSE;
916         TString localBaseFolder = sto->GetBaseFolder();
917         
918         TString dir;
919         TString alienDir;
920         TString begin;
921         
922         if (strcmp(type, "reference") == 0) 
923         {
924                 dir = GetRefFilePrefix(localBaseFolder.Data(), fCurrentDetector.Data());
925                 AliCDBStorage* gridSto = man->GetStorage(fgkMainRefStorage);
926                 if (!gridSto)
927                         return kFALSE;
928                 TString gridBaseFolder = gridSto->GetBaseFolder();
929                 alienDir = GetRefFilePrefix(gridBaseFolder.Data(), fCurrentDetector.Data());
930                 begin = Form("%d_", GetCurrentRun());
931         } 
932         else if (strcmp(type, "metadata") == 0)
933         {
934                         
935                 TString lhcPeriod = GetLHCPeriod();
936         
937                 if (lhcPeriod.Length() == 0) 
938                 {
939                         Log("SHUTTLE","CopyFilesToGrid - LHCPeriod not found in logbook!");
940                         return 0;
941                 }
942                 
943                 dir = Form("%s/GRP/RunMetadata/alice/data/%d/%s/%09d/Raw", 
944                                 localBaseFolder.Data(), GetCurrentYear(), 
945                                 lhcPeriod.Data(), GetCurrentRun());
946                 alienDir = dir(dir.Index("/alice/data/"), dir.Length());
947                 
948                 begin = "";
949         }
950         else 
951         {
952                 Log("SHUTTLE", "CopyFilesToGrid - Unexpected: type label must be reference or metadata!");
953                 return kFALSE;
954         }
955                 
956         TSystemDirectory* baseDir = new TSystemDirectory("/", dir);
957         if (!baseDir)
958                 return kTRUE;
959                 
960         TList* dirList = baseDir->GetListOfFiles();
961         delete baseDir;
962         
963         if (!dirList) return kTRUE;
964                 
965         if (dirList->GetEntries() < 3) 
966         {
967                 delete dirList;
968                 return kTRUE;
969         }
970                         
971         if (!gGrid)
972         { 
973                 Log("SHUTTLE", "CopyFilesToGrid - Connection to Grid failed: Cannot continue!");
974                 delete dirList;
975                 return kFALSE;
976         }
977         
978         Int_t nDirs = 0, nTransfer = 0;
979         TIter dirIter(dirList);
980         TSystemFile* entry = 0;
981
982         Bool_t success = kTRUE;
983         Bool_t first = kTRUE;
984         
985         while ((entry = dynamic_cast<TSystemFile*> (dirIter.Next())))
986         {                       
987                 if (entry->IsDirectory())
988                         continue;
989                         
990                 TString fileName(entry->GetName());
991                 if (!fileName.BeginsWith(begin))
992                         continue;
993                         
994                 nDirs++;
995                         
996                 if (first)
997                 {
998                         first = kFALSE;
999                         // check that folder exists, otherwise create it
1000                         TGridResult* result = gGrid->Ls(alienDir.Data(), "a");
1001                         
1002                         if (!result)
1003                         {
1004                                 delete dirList;
1005                                 return kFALSE;
1006                         }
1007                         
1008                         if (!result->GetFileName(1)) // TODO: It looks like element 0 is always 0!!
1009                         {
1010                                 // TODO It does not work currently! Bug in TAliEn::Mkdir
1011                                 // TODO Manually fixed in local root v5-16-00
1012                                 if (!gGrid->Mkdir(alienDir.Data(),"-p",0))
1013                                 {
1014                                         Log("SHUTTLE", Form("CopyFilesToGrid - Cannot create directory %s",
1015                                                         alienDir.Data()));
1016                                         delete dirList;
1017                                         return kFALSE;
1018                                 } else {
1019                                         Log("SHUTTLE",Form("CopyFilesToGrid - Folder %s created", alienDir.Data()));
1020                                 }
1021                                 
1022                         } else {
1023                                         Log("SHUTTLE",Form("CopyFilesToGrid - Folder %s found", alienDir.Data()));
1024                         }
1025                 }
1026                         
1027                 TString fullLocalPath;
1028                 fullLocalPath.Form("%s/%s", dir.Data(), fileName.Data());
1029                 
1030                 TString fullGridPath;
1031                 fullGridPath.Form("alien://%s/%s", alienDir.Data(), fileName.Data());
1032
1033                 Bool_t result = TFile::Cp(fullLocalPath, fullGridPath);
1034                 
1035                 if (result)
1036                 {
1037                         Log("SHUTTLE", Form("CopyFilesToGrid - Copying local file %s to %s succeeded!", 
1038                                                 fullLocalPath.Data(), fullGridPath.Data()));
1039                         RemoveFile(fullLocalPath);
1040                         nTransfer++;
1041                 }
1042                 else
1043                 {
1044                         Log("SHUTTLE", Form("CopyFilesToGrid - Copying local file %s to %s FAILED!", 
1045                                                 fullLocalPath.Data(), fullGridPath.Data()));
1046                         success = kFALSE;
1047                 }
1048         }
1049
1050         Log("SHUTTLE", Form("CopyFilesToGrid - %d (over %d) files in folder %s copied to Grid.", 
1051                                                 nTransfer, nDirs, dir.Data()));
1052
1053                 
1054         delete dirList;
1055         return success;
1056 }
1057
1058 //______________________________________________________________________________________________
1059 const char* AliShuttle::GetRefFilePrefix(const char* base, const char* detector)
1060 {
1061         //
1062         // Get folder name of reference files 
1063         //
1064
1065         TString offDetStr(GetOfflineDetName(detector));
1066         TString dir;
1067         if (offDetStr == "ITS" || offDetStr == "MUON" || offDetStr == "PHOS")
1068         {
1069                 dir.Form("%s/%s/%s", base, offDetStr.Data(), detector);
1070         } else {
1071                 dir.Form("%s/%s", base, offDetStr.Data());
1072         }
1073         
1074         return dir.Data();
1075         
1076
1077 }
1078
1079 //______________________________________________________________________________________________
1080 void AliShuttle::CleanLocalStorage(const TString& uri)
1081 {
1082         //
1083         // Called in case the preprocessor is declared failed. Remove remaining objects from the local storages.
1084         //
1085
1086         const char* type = 0;
1087         if(uri == fgkLocalCDB) {
1088                 type = "OCDB";
1089         } else if(uri == fgkLocalRefStorage) {
1090                 type = "Reference";
1091         } else {
1092                 AliError(Form("Invalid storage URI: %s", uri.Data()));
1093                 return;
1094         }
1095
1096         AliCDBManager* man = AliCDBManager::Instance();
1097
1098         // open local storage
1099         AliCDBStorage *localSto = man->GetStorage(uri);
1100         if(!localSto) {
1101                 Log("SHUTTLE",
1102                         Form("CleanLocalStorage - cannot activate local %s storage", type));
1103                 return;
1104         }
1105
1106         TString filename(Form("%s/%s/*/Run*_v%d_s*.root",
1107                 localSto->GetBaseFolder().Data(), GetOfflineDetName(fCurrentDetector.Data()), GetCurrentRun()));
1108
1109         AliDebug(2, Form("filename = %s", filename.Data()));
1110
1111         Log("SHUTTLE", Form("Removing remaining local files for run %d and detector %s ...",
1112                 GetCurrentRun(), fCurrentDetector.Data()));
1113
1114         RemoveFile(filename.Data());
1115
1116 }
1117
1118 //______________________________________________________________________________________________
1119 void AliShuttle::RemoveFile(const char* filename)
1120 {
1121         //
1122         // removes local file
1123         //
1124
1125         TString command(Form("rm -f %s", filename));
1126
1127         Int_t result = gSystem->Exec(command.Data());
1128         if(result != 0)
1129         {
1130                 Log("SHUTTLE", Form("RemoveFile - %s: Cannot remove file %s!",
1131                         fCurrentDetector.Data(), filename));
1132         }
1133 }
1134
1135 //______________________________________________________________________________________________
1136 AliShuttleStatus* AliShuttle::ReadShuttleStatus()
1137 {
1138         //
1139         // Reads the AliShuttleStatus from the CDB
1140         //
1141
1142         if (fStatusEntry){
1143                 delete fStatusEntry;
1144                 fStatusEntry = 0;
1145         }
1146
1147         fStatusEntry = AliCDBManager::Instance()->GetStorage(GetLocalCDB())
1148                 ->Get(Form("/SHUTTLE/STATUS/%s", fCurrentDetector.Data()), GetCurrentRun());
1149
1150         if (!fStatusEntry) return 0;
1151         fStatusEntry->SetOwner(1);
1152
1153         AliShuttleStatus* status = dynamic_cast<AliShuttleStatus*> (fStatusEntry->GetObject());
1154         if (!status) {
1155                 AliError("Invalid object stored to CDB!");
1156                 return 0;
1157         }
1158
1159         return status;
1160 }
1161
1162 //______________________________________________________________________________________________
1163 Bool_t AliShuttle::WriteShuttleStatus(AliShuttleStatus* status)
1164 {
1165         //
1166         // writes the status for one subdetector
1167         //
1168
1169         if (fStatusEntry){
1170                 delete fStatusEntry;
1171                 fStatusEntry = 0;
1172         }
1173
1174         Int_t run = GetCurrentRun();
1175
1176         AliCDBId id(AliCDBPath("SHUTTLE", "STATUS", fCurrentDetector), run, run);
1177
1178         fStatusEntry = new AliCDBEntry(status, id, new AliCDBMetaData);
1179         fStatusEntry->SetOwner(1);
1180
1181         UInt_t result = AliCDBManager::Instance()->GetStorage(fgkLocalCDB)->Put(fStatusEntry);
1182
1183         if (!result) {
1184                 Log("SHUTTLE", Form("WriteShuttleStatus - Failed for %s, run %d",
1185                                                 fCurrentDetector.Data(), run));
1186                 return kFALSE;
1187         }
1188         
1189         SendMLInfo();
1190
1191         return kTRUE;
1192 }
1193
1194 //______________________________________________________________________________________________
1195 void AliShuttle::UpdateShuttleStatus(AliShuttleStatus::Status newStatus, Bool_t increaseCount)
1196 {
1197         //
1198         // changes the AliShuttleStatus for the given detector and run to the given status
1199         //
1200
1201         if (!fStatusEntry){
1202                 AliError("UNEXPECTED: fStatusEntry empty");
1203                 return;
1204         }
1205
1206         AliShuttleStatus* status = dynamic_cast<AliShuttleStatus*> (fStatusEntry->GetObject());
1207
1208         if (!status){
1209                 Log("SHUTTLE", "UpdateShuttleStatus - UNEXPECTED: status could not be read from current CDB entry");
1210                 return;
1211         }
1212
1213         TString actionStr = Form("UpdateShuttleStatus - %s: Changing state from %s to %s",
1214                                 fCurrentDetector.Data(),
1215                                 status->GetStatusName(),
1216                                 status->GetStatusName(newStatus));
1217         Log("SHUTTLE", actionStr);
1218         SetLastAction(actionStr);
1219
1220         status->SetStatus(newStatus);
1221         if (increaseCount) status->IncreaseCount();
1222
1223         AliCDBManager::Instance()->GetStorage(fgkLocalCDB)->Put(fStatusEntry);
1224
1225         SendMLInfo();
1226 }
1227
1228 //______________________________________________________________________________________________
1229 void AliShuttle::SendMLInfo()
1230 {
1231         //
1232         // sends ML information about the current status of the current detector being processed
1233         //
1234         
1235         AliShuttleStatus* status = dynamic_cast<AliShuttleStatus*> (fStatusEntry->GetObject());
1236         
1237         if (!status){
1238                 Log("SHUTTLE", "SendMLInfo - UNEXPECTED: status could not be read from current CDB entry");
1239                 return;
1240         }
1241         
1242         TMonaLisaText  mlStatus(Form("%s_status", fCurrentDetector.Data()), status->GetStatusName());
1243         TMonaLisaValue mlRetryCount(Form("%s_count", fCurrentDetector.Data()), status->GetCount());
1244
1245         TList mlList;
1246         mlList.Add(&mlStatus);
1247         mlList.Add(&mlRetryCount);
1248
1249         fMonaLisa->SendParameters(&mlList);
1250 }
1251
1252 //______________________________________________________________________________________________
1253 Bool_t AliShuttle::ContinueProcessing()
1254 {
1255         // this function reads the AliShuttleStatus information from CDB and
1256         // checks if the processing should be continued
1257         // if yes it returns kTRUE and updates the AliShuttleStatus with nextStatus
1258
1259         if (!fConfig->HostProcessDetector(fCurrentDetector)) return kFALSE;
1260
1261         AliPreprocessor* aPreprocessor =
1262                 dynamic_cast<AliPreprocessor*> (fPreprocessorMap.GetValue(fCurrentDetector));
1263         if (!aPreprocessor)
1264         {
1265                 Log("SHUTTLE", Form("ContinueProcessing - %s: no preprocessor registered", fCurrentDetector.Data()));
1266                 return kFALSE;
1267         }
1268
1269         AliShuttleLogbookEntry::Status entryStatus =
1270                 fLogbookEntry->GetDetectorStatus(fCurrentDetector);
1271
1272         if(entryStatus != AliShuttleLogbookEntry::kUnprocessed) {
1273                 Log("SHUTTLE", Form("ContinueProcessing - %s is %s",
1274                                 fCurrentDetector.Data(),
1275                                 fLogbookEntry->GetDetectorStatusName(entryStatus)));
1276                 return kFALSE;
1277         }
1278
1279         // if we get here, according to Shuttle logbook subdetector is in UNPROCESSED state
1280
1281         // check if current run is first unprocessed run for current detector
1282         if (fConfig->StrictRunOrder(fCurrentDetector) &&
1283                 !fFirstUnprocessed[GetDetPos(fCurrentDetector)])
1284         {
1285                 if (fTestMode == kNone)
1286                 {
1287                         Log("SHUTTLE", Form("ContinueProcessing - %s requires strict run ordering"
1288                                         " but this is not the first unprocessed run!"));
1289                         return kFALSE;
1290                 }
1291                 else
1292                 {
1293                         Log("SHUTTLE", Form("ContinueProcessing - In TESTMODE - "
1294                                         "Although %s requires strict run ordering "
1295                                         "and this is not the first unprocessed run, "
1296                                         "the SHUTTLE continues"));
1297                 }
1298         }
1299
1300         AliShuttleStatus* status = ReadShuttleStatus();
1301         if (!status) {
1302                 // first time
1303                 Log("SHUTTLE", Form("ContinueProcessing - %s: Processing first time",
1304                                 fCurrentDetector.Data()));
1305                 status = new AliShuttleStatus(AliShuttleStatus::kStarted);
1306                 return WriteShuttleStatus(status);
1307         }
1308
1309         // The following two cases shouldn't happen if Shuttle Logbook was correctly updated.
1310         // If it happens it may mean Logbook updating failed... let's do it now!
1311         if (status->GetStatus() == AliShuttleStatus::kDone ||
1312             status->GetStatus() == AliShuttleStatus::kFailed){
1313                 Log("SHUTTLE", Form("ContinueProcessing - %s is already %s. Updating Shuttle Logbook",
1314                                         fCurrentDetector.Data(),
1315                                         status->GetStatusName(status->GetStatus())));
1316                 UpdateShuttleLogbook(fCurrentDetector.Data(),
1317                                         status->GetStatusName(status->GetStatus()));
1318                 return kFALSE;
1319         }
1320
1321         if (status->GetStatus() == AliShuttleStatus::kStoreError) {
1322                 Log("SHUTTLE",
1323                         Form("ContinueProcessing - %s: Grid storage of one or more "
1324                                 "objects failed. Trying again now",
1325                                 fCurrentDetector.Data()));
1326                 UpdateShuttleStatus(AliShuttleStatus::kStoreStarted);
1327                 if (StoreOCDB()){
1328                         Log("SHUTTLE", Form("ContinueProcessing - %s: all objects "
1329                                 "successfully stored into main storage",
1330                                 fCurrentDetector.Data()));
1331                         UpdateShuttleStatus(AliShuttleStatus::kDone);
1332                         UpdateShuttleLogbook(fCurrentDetector.Data(), "DONE");
1333                 } else {
1334                         Log("SHUTTLE",
1335                                 Form("ContinueProcessing - %s: Grid storage failed again",
1336                                         fCurrentDetector.Data()));
1337                         UpdateShuttleStatus(AliShuttleStatus::kStoreError);
1338                 }
1339                 return kFALSE;
1340         }
1341
1342         // if we get here, there is a restart
1343         Bool_t cont = kFALSE;
1344
1345         // abort conditions
1346         if (status->GetCount() >= fConfig->GetMaxRetries()) {
1347                 Log("SHUTTLE", Form("ContinueProcessing - %s failed %d times in status %s - "
1348                                 "Updating Shuttle Logbook", fCurrentDetector.Data(),
1349                                 status->GetCount(), status->GetStatusName()));
1350                 UpdateShuttleLogbook(fCurrentDetector.Data(), "FAILED");
1351                 UpdateShuttleStatus(AliShuttleStatus::kFailed);
1352
1353                 // there may still be objects in local OCDB and reference storage
1354                 // and FXS databases may be not updated: do it now!
1355                 
1356                 // TODO Currently disabled, we want to keep files in case of failure!
1357                 // CleanLocalStorage(fgkLocalCDB);
1358                 // CleanLocalStorage(fgkLocalRefStorage);
1359                 // UpdateTableFailCase();
1360                 
1361                 // Send mail to detector expert!
1362                 Log("SHUTTLE", Form("ContinueProcessing - Sending mail to %s expert...", 
1363                                         fCurrentDetector.Data()));
1364                 if (!SendMail())
1365                         Log("SHUTTLE", Form("ContinueProcessing - Could not send mail to %s expert",
1366                                         fCurrentDetector.Data()));
1367
1368         } else {
1369                 Log("SHUTTLE", Form("ContinueProcessing - %s: restarting. "
1370                                 "Aborted before with %s. Retry number %d.", fCurrentDetector.Data(),
1371                                 status->GetStatusName(), status->GetCount()));
1372                 Bool_t increaseCount = kTRUE;
1373                 if (status->GetStatus() == AliShuttleStatus::kDCSError || 
1374                         status->GetStatus() == AliShuttleStatus::kDCSStarted)
1375                                 increaseCount = kFALSE;
1376                                 
1377                 UpdateShuttleStatus(AliShuttleStatus::kStarted, increaseCount);
1378                 cont = kTRUE;
1379         }
1380
1381         return cont;
1382 }
1383
1384 //______________________________________________________________________________________________
1385 Bool_t AliShuttle::Process(AliShuttleLogbookEntry* entry)
1386 {
1387         //
1388         // Makes data retrieval for all detectors in the configuration.
1389         // entry: Shuttle logbook entry, contains run paramenters and status of detectors
1390         // (Unprocessed, Inactive, Failed or Done).
1391         // Returns kFALSE in case of error occured and kTRUE otherwise
1392         //
1393
1394         if (!entry) return kFALSE;
1395
1396         fLogbookEntry = entry;
1397
1398         Log("SHUTTLE", Form("\t\t\t^*^*^*^*^*^*^*^*^*^*^*^* run %d: START ^*^*^*^*^*^*^*^*^*^*^*^*",
1399                                         GetCurrentRun()));
1400
1401         // create ML instance that monitors this run
1402         fMonaLisa = new TMonaLisaWriter(fConfig->GetMonitorHost(), fConfig->GetMonitorTable(), Form("%d", GetCurrentRun()));
1403
1404         // Send the information to ML
1405         TMonaLisaText  mlStatus("SHUTTLE_status", "Processing");
1406         TMonaLisaText  mlRunType("SHUTTLE_runtype", Form("%s (%s)", entry->GetRunType(), entry->GetRunParameter("log")));
1407
1408         TList mlList;
1409         mlList.Add(&mlStatus);
1410         mlList.Add(&mlRunType);
1411
1412         fMonaLisa->SendParameters(&mlList);
1413
1414         if (fLogbookEntry->IsDone())
1415         {
1416                 Log("SHUTTLE","Process - Shuttle is already DONE. Updating logbook");
1417                 UpdateShuttleLogbook("shuttle_done");
1418                 fLogbookEntry = 0;
1419                 return kTRUE;
1420         }
1421
1422         // read test mode if flag is set
1423         if (fReadTestMode)
1424         {
1425                 fTestMode = kNone;
1426                 TString logEntry(entry->GetRunParameter("log"));
1427                 //printf("log entry = %s\n", logEntry.Data());
1428                 TString searchStr("Testmode: ");
1429                 Int_t pos = logEntry.Index(searchStr.Data());
1430                 //printf("%d\n", pos);
1431                 if (pos >= 0)
1432                 {
1433                         TSubString subStr = logEntry(pos + searchStr.Length(), logEntry.Length());
1434                         //printf("%s\n", subStr.String().Data());
1435                         TString newStr(subStr.Data());
1436                         TObjArray* token = newStr.Tokenize(' ');
1437                         if (token)
1438                         {
1439                                 //token->Print();
1440                                 TObjString* tmpStr = dynamic_cast<TObjString*> (token->First());
1441                                 if (tmpStr)
1442                                 {
1443                                         Int_t testMode = tmpStr->String().Atoi();
1444                                         if (testMode > 0)
1445                                         {
1446                                                 Log("SHUTTLE", Form("Process - Enabling test mode %d", testMode));
1447                                                 SetTestMode((TestMode) testMode);
1448                                         }
1449                                 }
1450                                 delete token;          
1451                         }
1452                 }
1453         }
1454                 
1455         fLogbookEntry->Print("all");
1456
1457         // Initialization
1458         Bool_t hasError = kFALSE;
1459
1460         // Set the CDB and Reference folders according to the year and LHC period
1461         TString lhcPeriod(GetLHCPeriod());
1462         if (lhcPeriod.Length() == 0) 
1463         {
1464                 Log("SHUTTLE","Process - LHCPeriod not found in logbook!");
1465                 return 0; 
1466         }       
1467         
1468         if (fgkMainCDB.Length() == 0)
1469                 fgkMainCDB = Form("alien://folder=/alice/data/%d/%s/OCDB?user=alidaq?cacheFold=/tmp/OCDBCache", 
1470                                         GetCurrentYear(), lhcPeriod.Data());
1471         
1472         if (fgkMainRefStorage.Length() == 0)
1473                 fgkMainRefStorage = Form("alien://folder=/alice/data/%d/%s/Reference?user=alidaq?cacheFold=/tmp/OCDBCache", 
1474                                         GetCurrentYear(), lhcPeriod.Data());
1475         
1476         AliCDBStorage *mainCDBSto = AliCDBManager::Instance()->GetStorage(fgkMainCDB);
1477         if(mainCDBSto) mainCDBSto->QueryCDB(GetCurrentRun());
1478         AliCDBStorage *mainRefSto = AliCDBManager::Instance()->GetStorage(fgkMainRefStorage);
1479         if(mainRefSto) mainRefSto->QueryCDB(GetCurrentRun());
1480
1481         // Loop on detectors in the configuration
1482         TIter iter(fConfig->GetDetectors());
1483         TObjString* aDetector = 0;
1484
1485         while ((aDetector = (TObjString*) iter.Next()))
1486         {
1487                 fCurrentDetector = aDetector->String();
1488
1489                 if (ContinueProcessing() == kFALSE) continue;
1490
1491                 Log("SHUTTLE", Form("\t\t\t****** run %d - %s: START  ******",
1492                                                 GetCurrentRun(), aDetector->GetName()));
1493
1494                 for(Int_t iSys=0;iSys<3;iSys++) fFXSCalled[iSys]=kFALSE;
1495
1496                 Log(fCurrentDetector.Data(), "Process - Starting processing");
1497
1498                 Int_t pid = fork();
1499
1500                 if (pid < 0)
1501                 {
1502                         Log("SHUTTLE", "Process - ERROR: Forking failed");
1503                 }
1504                 else if (pid > 0)
1505                 {
1506                         // parent
1507                         Log("SHUTTLE", Form("Process - In parent process of %d - %s: Starting monitoring",
1508                                                         GetCurrentRun(), aDetector->GetName()));
1509
1510                         Long_t begin = time(0);
1511
1512                         int status; // to be used with waitpid, on purpose an int (not Int_t)!
1513                         while (waitpid(pid, &status, WNOHANG) == 0)
1514                         {
1515                                 Long_t expiredTime = time(0) - begin;
1516
1517                                 if (expiredTime > fConfig->GetPPTimeOut())
1518                                 {
1519                                         TString tmp;
1520                                         tmp.Form("Process - Process of %s time out. "
1521                                                         "Run time: %d seconds. Killing...",
1522                                                         fCurrentDetector.Data(), expiredTime);
1523                                         Log("SHUTTLE", tmp);
1524                                         Log(fCurrentDetector, tmp);
1525
1526                                         kill(pid, 9);
1527
1528                                         UpdateShuttleStatus(AliShuttleStatus::kPPTimeOut);
1529                                         hasError = kTRUE;
1530
1531                                         gSystem->Sleep(1000);
1532                                 }
1533                                 else
1534                                 {
1535                                         gSystem->Sleep(1000);
1536                                         
1537                                         TString checkStr;
1538                                         checkStr.Form("ps -o vsize --pid %d | tail -n 1", pid);
1539                                         FILE* pipe = gSystem->OpenPipe(checkStr, "r");
1540                                         if (!pipe)
1541                                         {
1542                                                 Log("SHUTTLE", Form("Process - Error: "
1543                                                         "Could not open pipe to %s", checkStr.Data()));
1544                                                 continue;
1545                                         }
1546                                                 
1547                                         char buffer[100];
1548                                         if (!fgets(buffer, 100, pipe))
1549                                         {
1550                                                 Log("SHUTTLE", "Process - Error: ps did not return anything");
1551                                                 gSystem->ClosePipe(pipe);
1552                                                 continue;
1553                                         }
1554                                         gSystem->ClosePipe(pipe);
1555                                         
1556                                         //Log("SHUTTLE", Form("ps returned %s", buffer));
1557                                         
1558                                         Int_t mem = 0;
1559                                         if ((sscanf(buffer, "%d\n", &mem) != 1) || !mem)
1560                                         {
1561                                                 Log("SHUTTLE", "Process - Error: Could not parse output of ps");
1562                                                 continue;
1563                                         }
1564                                         
1565                                         if (expiredTime % 60 == 0)
1566                                                 Log("SHUTTLE", Form("Process - %s: Checking process. "
1567                                                         "Run time: %d seconds - Memory consumption: %d KB",
1568                                                         fCurrentDetector.Data(), expiredTime, mem));
1569                                         
1570                                         if (mem > fConfig->GetPPMaxMem())
1571                                         {
1572                                                 TString tmp;
1573                                                 tmp.Form("Process - Process exceeds maximum allowed memory "
1574                                                         "(%d KB > %d KB). Killing...",
1575                                                         mem, fConfig->GetPPMaxMem());
1576                                                 Log("SHUTTLE", tmp);
1577                                                 Log(fCurrentDetector, tmp);
1578         
1579                                                 kill(pid, 9);
1580         
1581                                                 UpdateShuttleStatus(AliShuttleStatus::kPPOutOfMemory);
1582                                                 hasError = kTRUE;
1583         
1584                                                 gSystem->Sleep(1000);
1585                                         }
1586                                 }
1587                         }
1588
1589                         Log("SHUTTLE", Form("Process - In parent process of %d - %s: Client has terminated.",
1590                                                                 GetCurrentRun(), aDetector->GetName()));
1591
1592                         if (WIFEXITED(status))
1593                         {
1594                                 Int_t returnCode = WEXITSTATUS(status);
1595
1596                                 Log("SHUTTLE", Form("Process - %s: the return code is %d", fCurrentDetector.Data(),
1597                                                                                 returnCode));
1598
1599                                 if (returnCode == 0) hasError = kTRUE;
1600                         }
1601                 }
1602                 else if (pid == 0)
1603                 {
1604                         // client
1605                         Log("SHUTTLE", Form("Process - In client process of %d - %s", GetCurrentRun(),
1606                                 aDetector->GetName()));
1607
1608                         Log("SHUTTLE", Form("Process - Redirecting output to %s log",fCurrentDetector.Data()));
1609
1610                         if ((freopen(GetLogFileName(fCurrentDetector), "a", stdout)) == 0)
1611                         {
1612                                 Log("SHUTTLE", "Process - Could not freopen stdout");
1613                         }
1614                         else
1615                         {
1616                                 fOutputRedirected = kTRUE;
1617                                 if ((dup2(fileno(stdout), fileno(stderr))) < 0)
1618                                         Log("SHUTTLE", "Process - Could not redirect stderr");
1619                                 
1620                         }
1621                         
1622                         TString wd = gSystem->WorkingDirectory();
1623                         TString tmpDir = Form("%s/%s_%d_process", GetShuttleTempDir(), 
1624                                 fCurrentDetector.Data(), GetCurrentRun());
1625                         
1626                         Int_t result = gSystem->GetPathInfo(tmpDir.Data(), 0, (Long64_t*) 0, 0, 0);
1627                         if (!result) // temp dir already exists!
1628                         {
1629                                 Log(fCurrentDetector.Data(), 
1630                                         Form("Process - %s dir already exists! Removing...", tmpDir.Data()));
1631                                 gSystem->Exec(Form("rm -rf %s",tmpDir.Data()));         
1632                         } 
1633                         
1634                         if (gSystem->mkdir(tmpDir.Data(), 1))
1635                         {
1636                                 Log(fCurrentDetector.Data(), "Process - could not make temp directory!!");
1637                                 gSystem->Exit(1);
1638                         }
1639                         
1640                         if (!gSystem->ChangeDirectory(tmpDir.Data())) 
1641                         {
1642                                 Log(fCurrentDetector.Data(), "Process - could not change directory!!");
1643                                 gSystem->Exit(1);                       
1644                         }
1645                         
1646                         Bool_t success = ProcessCurrentDetector();
1647                         
1648                         gSystem->ChangeDirectory(wd.Data());
1649                                                 
1650                         if (success) // Preprocessor finished successfully!
1651                         { 
1652                                 // remove temporary folder
1653                                 gSystem->Exec(Form("rm -rf %s",tmpDir.Data()));
1654                                 
1655                                 // Update time_processed field in FXS DB
1656                                 if (UpdateTable() == kFALSE)
1657                                         Log("SHUTTLE", Form("Process - %s: Could not update FXS databases!", 
1658                                                         fCurrentDetector.Data()));
1659
1660                                 // Transfer the data from local storage to main storage (Grid)
1661                                 UpdateShuttleStatus(AliShuttleStatus::kStoreStarted);
1662                                 if (StoreOCDB() == kFALSE)
1663                                 {
1664                                         Log("SHUTTLE", 
1665                                                 Form("\t\t\t****** run %d - %s: STORAGE ERROR ******",
1666                                                         GetCurrentRun(), aDetector->GetName()));
1667                                         UpdateShuttleStatus(AliShuttleStatus::kStoreError);
1668                                         success = kFALSE;
1669                                 } else {
1670                                         Log("SHUTTLE", 
1671                                                 Form("\t\t\t****** run %d - %s: DONE ******",
1672                                                         GetCurrentRun(), aDetector->GetName()));
1673                                         UpdateShuttleStatus(AliShuttleStatus::kDone);
1674                                         UpdateShuttleLogbook(fCurrentDetector, "DONE");
1675                                 }
1676                         } else 
1677                         {
1678                                 Log("SHUTTLE", 
1679                                         Form("\t\t\t****** run %d - %s: PP ERROR ******",
1680                                                 GetCurrentRun(), aDetector->GetName()));
1681                         }
1682
1683                         for (UInt_t iSys=0; iSys<3; iSys++)
1684                         {
1685                                 if (fFXSCalled[iSys]) fFXSlist[iSys].Clear();
1686                         }
1687
1688                         Log("SHUTTLE", Form("Process - Client process of %d - %s is exiting now with %d.",
1689                                                         GetCurrentRun(), aDetector->GetName(), success));
1690
1691                         // the client exits here
1692                         gSystem->Exit(success);
1693
1694                         AliError("We should never get here!!!");
1695                 }
1696         }
1697
1698         Log("SHUTTLE", Form("\t\t\t^*^*^*^*^*^*^*^*^*^*^*^* run %d: FINISH ^*^*^*^*^*^*^*^*^*^*^*^*",
1699                                                         GetCurrentRun()));
1700
1701         //check if shuttle is done for this run, if so update logbook
1702         TObjArray checkEntryArray;
1703         checkEntryArray.SetOwner(1);
1704         TString whereClause = Form("where run=%d", GetCurrentRun());
1705         if (!QueryShuttleLogbook(whereClause.Data(), checkEntryArray) || checkEntryArray.GetEntries() == 0) {
1706                 Log("SHUTTLE", Form("Process - Warning: Cannot check status of run %d on Shuttle logbook!",
1707                                                 GetCurrentRun()));
1708                 return hasError == kFALSE;
1709         }
1710
1711         AliShuttleLogbookEntry* checkEntry = dynamic_cast<AliShuttleLogbookEntry*>
1712                                                 (checkEntryArray.At(0));
1713
1714         if (checkEntry)
1715         {
1716                 if (checkEntry->IsDone())
1717                 {
1718                         Log("SHUTTLE","Process - Shuttle is DONE. Updating logbook");
1719                         UpdateShuttleLogbook("shuttle_done");
1720                 }
1721                 else
1722                 {
1723                         for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
1724                         {
1725                                 if (checkEntry->GetDetectorStatus(iDet) == AliShuttleLogbookEntry::kUnprocessed)
1726                                 {
1727                                         AliDebug(2, Form("Run %d: setting %s as \"not first time unprocessed\"",
1728                                                         checkEntry->GetRun(), GetDetName(iDet)));
1729                                         fFirstUnprocessed[iDet] = kFALSE;
1730                                 }
1731                         }
1732                 }
1733         }
1734
1735         // remove ML instance
1736         delete fMonaLisa;
1737         fMonaLisa = 0;
1738
1739         fLogbookEntry = 0;
1740
1741         return hasError == kFALSE;
1742 }
1743
1744 //______________________________________________________________________________________________
1745 Bool_t AliShuttle::ProcessCurrentDetector()
1746 {
1747         //
1748         // Makes data retrieval just for a specific detector (fCurrentDetector).
1749         // Threre should be a configuration for this detector.
1750
1751         Log("SHUTTLE", Form("ProcessCurrentDetector - Retrieving values for %s, run %d", 
1752                                                 fCurrentDetector.Data(), GetCurrentRun()));
1753
1754         TString wd = gSystem->WorkingDirectory();
1755         
1756         if (!CleanReferenceStorage(fCurrentDetector.Data()))
1757                 return kFALSE;
1758         
1759         gSystem->ChangeDirectory(wd.Data());
1760         
1761         TMap* dcsMap = new TMap();
1762
1763         // call preprocessor
1764         AliPreprocessor* aPreprocessor =
1765                 dynamic_cast<AliPreprocessor*> (fPreprocessorMap.GetValue(fCurrentDetector));
1766
1767         aPreprocessor->Initialize(GetCurrentRun(), GetCurrentStartTime(), GetCurrentEndTime());
1768
1769         Bool_t processDCS = aPreprocessor->ProcessDCS();
1770
1771         if (!processDCS)
1772         {
1773                 Log(fCurrentDetector, "ProcessCurrentDetector -"
1774                         " The preprocessor requested to skip the retrieval of DCS values");
1775         }
1776         else if (fTestMode & kSkipDCS)
1777         {
1778                 Log(fCurrentDetector, "ProcessCurrentDetector - In TESTMODE: Skipping DCS processing");
1779         } 
1780         else if (fTestMode & kErrorDCS)
1781         {
1782                 Log(fCurrentDetector, "ProcessCurrentDetector - In TESTMODE: Simulating DCS error");
1783                 UpdateShuttleStatus(AliShuttleStatus::kDCSStarted);
1784                 UpdateShuttleStatus(AliShuttleStatus::kDCSError);
1785                 delete dcsMap;
1786                 return kFALSE;
1787         } else {
1788
1789                 UpdateShuttleStatus(AliShuttleStatus::kDCSStarted);
1790
1791                 // Query DCS archive
1792                 Int_t nServers = fConfig->GetNServers(fCurrentDetector);
1793                 
1794                 for (int iServ=0; iServ<nServers; iServ++)
1795                 {
1796                 
1797                         TString host(fConfig->GetDCSHost(fCurrentDetector, iServ));
1798                         Int_t port = fConfig->GetDCSPort(fCurrentDetector, iServ);
1799                         Int_t multiSplit = fConfig->GetMultiSplit(fCurrentDetector, iServ);
1800
1801                         Log(fCurrentDetector, Form("ProcessCurrentDetector -"
1802                                         " Querying DCS Amanda server %s:%d (%d of %d)", 
1803                                         host.Data(), port, iServ+1, nServers));
1804                         
1805                         TMap* aliasMap = 0;
1806                         TMap* dpMap = 0;
1807         
1808                         if (fConfig->GetDCSAliases(fCurrentDetector, iServ)->GetEntries() > 0)
1809                         {
1810                                 aliasMap = GetValueSet(host, port, 
1811                                                 fConfig->GetDCSAliases(fCurrentDetector, iServ), 
1812                                                 kAlias, multiSplit);
1813                                 if (!aliasMap)
1814                                 {
1815                                         Log(fCurrentDetector, 
1816                                                 Form("ProcessCurrentDetector -"
1817                                                         " Error retrieving DCS aliases from server %s."
1818                                                         " Sending mail to DCS experts!", host.Data()));
1819                                         UpdateShuttleStatus(AliShuttleStatus::kDCSError);
1820                                         
1821                                         if (!SendMailToDCS())
1822                                                 Log("SHUTTLE", Form("ProcessCurrentDetector - Could not send mail to DCS experts!"));
1823
1824                                         delete dcsMap;
1825                                         return kFALSE;
1826                                 }
1827                         }
1828                         
1829                         if (fConfig->GetDCSDataPoints(fCurrentDetector, iServ)->GetEntries() > 0)
1830                         {
1831                                 dpMap = GetValueSet(host, port, 
1832                                                 fConfig->GetDCSDataPoints(fCurrentDetector, iServ), 
1833                                                 kDP, multiSplit);
1834                                 if (!dpMap)
1835                                 {
1836                                         Log(fCurrentDetector, 
1837                                                 Form("ProcessCurrentDetector -"
1838                                                         " Error retrieving DCS data points from server %s."
1839                                                         " Sending mail to DCS experts!", host.Data()));
1840                                         UpdateShuttleStatus(AliShuttleStatus::kDCSError);
1841                                         
1842                                         if (!SendMailToDCS())
1843                                                 Log("SHUTTLE", Form("ProcessCurrentDetector - Could not send mail to DCS experts!"));
1844                                         
1845                                         if (aliasMap) delete aliasMap;
1846                                         delete dcsMap;
1847                                         return kFALSE;
1848                                 }                               
1849                         }
1850                         
1851                         // merge aliasMap and dpMap into dcsMap
1852                         if(aliasMap) {
1853                                 TIter iter(aliasMap);
1854                                 TObjString* key = 0;
1855                                 while ((key = (TObjString*) iter.Next()))
1856                                         dcsMap->Add(key, aliasMap->GetValue(key->String()));
1857                                 
1858                                 aliasMap->SetOwner(kFALSE);
1859                                 delete aliasMap;
1860                         }       
1861                         
1862                         if(dpMap) {
1863                                 TIter iter(dpMap);
1864                                 TObjString* key = 0;
1865                                 while ((key = (TObjString*) iter.Next()))
1866                                         dcsMap->Add(key, dpMap->GetValue(key->String()));
1867                                 
1868                                 dpMap->SetOwner(kFALSE);
1869                                 delete dpMap;
1870                         }
1871                 }
1872         }
1873         
1874         // save map into file, to help debugging in case of preprocessor error
1875         TFile* f = TFile::Open("DCSMap.root","recreate");
1876         f->cd();
1877         dcsMap->Write("DCSMap", TObject::kSingleKey);
1878         f->Close();
1879         delete f;
1880         
1881         // DCS Archive DB processing successful. Call Preprocessor!
1882         UpdateShuttleStatus(AliShuttleStatus::kPPStarted);
1883
1884         UInt_t returnValue = aPreprocessor->Process(dcsMap);
1885
1886         if (returnValue > 0) // Preprocessor error!
1887         {
1888                 Log(fCurrentDetector, Form("ProcessCurrentDetector - "
1889                                 "Preprocessor failed. Process returned %d.", returnValue));
1890                 UpdateShuttleStatus(AliShuttleStatus::kPPError);
1891                 dcsMap->DeleteAll();
1892                 delete dcsMap;
1893                 return kFALSE;
1894         }
1895         
1896         // preprocessor ok!
1897         UpdateShuttleStatus(AliShuttleStatus::kPPDone);
1898         Log(fCurrentDetector, Form("ProcessCurrentDetector - %s preprocessor returned success",
1899                                 fCurrentDetector.Data()));
1900
1901         dcsMap->DeleteAll();
1902         delete dcsMap;
1903
1904         return kTRUE;
1905 }
1906
1907 //______________________________________________________________________________________________
1908 Bool_t AliShuttle::QueryShuttleLogbook(const char* whereClause,
1909                 TObjArray& entries)
1910 {
1911         // Query DAQ's Shuttle logbook and fills detector status object.
1912         // Call QueryRunParameters to query DAQ logbook for run parameters.
1913         //
1914
1915         entries.SetOwner(1);
1916
1917         // check connection, in case connect
1918         if(!Connect(3)) return kFALSE;
1919
1920         TString sqlQuery;
1921         sqlQuery = Form("select * from %s %s order by run", fConfig->GetShuttlelbTable(), whereClause);
1922
1923         TSQLResult* aResult = fServer[3]->Query(sqlQuery);
1924         if (!aResult) {
1925                 AliError(Form("Can't execute query <%s>!", sqlQuery.Data()));
1926                 return kFALSE;
1927         }
1928
1929         AliDebug(2,Form("Query = %s", sqlQuery.Data()));
1930
1931         if(aResult->GetRowCount() == 0) {
1932                 Log("SHUTTLE", "No entries in Shuttle Logbook match request");
1933                 delete aResult;
1934                 return kTRUE;
1935         }
1936
1937         // TODO Check field count!
1938         const UInt_t nCols = 23;
1939         if (aResult->GetFieldCount() != (Int_t) nCols) {
1940                 Log("SHUTTLE", "Invalid SQL result field number!");
1941                 delete aResult;
1942                 return kFALSE;
1943         }
1944
1945         TSQLRow* aRow;
1946         while ((aRow = aResult->Next())) {
1947                 TString runString(aRow->GetField(0), aRow->GetFieldLength(0));
1948                 Int_t run = runString.Atoi();
1949
1950                 AliShuttleLogbookEntry *entry = QueryRunParameters(run);
1951                 if (!entry)
1952                         continue;
1953
1954                 // loop on detectors
1955                 for(UInt_t ii = 0; ii < nCols; ii++)
1956                         entry->SetDetectorStatus(aResult->GetFieldName(ii), aRow->GetField(ii));
1957
1958                 entries.AddLast(entry);
1959                 delete aRow;
1960         }
1961
1962         delete aResult;
1963         return kTRUE;
1964 }
1965
1966 //______________________________________________________________________________________________
1967 AliShuttleLogbookEntry* AliShuttle::QueryRunParameters(Int_t run)
1968 {
1969         //
1970         // Retrieve run parameters written in the DAQ logbook and sets them into AliShuttleLogbookEntry object
1971         //
1972
1973         // check connection, in case connect
1974         if (!Connect(3))
1975                 return 0;
1976
1977         TString sqlQuery;
1978         sqlQuery.Form("select * from %s where run=%d", fConfig->GetDAQlbTable(), run);
1979
1980         TSQLResult* aResult = fServer[3]->Query(sqlQuery);
1981         if (!aResult) {
1982                 Log("SHUTTLE", Form("Can't execute query <%s>!", sqlQuery.Data()));
1983                 return 0;
1984         }
1985
1986         if (aResult->GetRowCount() == 0) {
1987                 Log("SHUTTLE", Form("QueryRunParameters - No entry in DAQ Logbook for run %d. Skipping", run));
1988                 delete aResult;
1989                 return 0;
1990         }
1991
1992         if (aResult->GetRowCount() > 1) {
1993                 Log("SHUTTLE", Form("QueryRunParameters - UNEXPECTED: "
1994                                 "more than one entry in DAQ Logbook for run %d!", run));
1995                 delete aResult;
1996                 return 0;
1997         }
1998
1999         TSQLRow* aRow = aResult->Next();
2000         if (!aRow)
2001         {
2002                 Log("SHUTTLE", Form("QueryRunParameters - Could not retrieve row for run %d. Skipping", run));
2003                 delete aResult;
2004                 return 0;
2005         }
2006
2007         AliShuttleLogbookEntry* entry = new AliShuttleLogbookEntry(run);
2008
2009         for (Int_t ii = 0; ii < aResult->GetFieldCount(); ii++)
2010                 entry->SetRunParameter(aResult->GetFieldName(ii), aRow->GetField(ii));
2011
2012         UInt_t startTime = entry->GetStartTime();
2013         UInt_t endTime = entry->GetEndTime();
2014
2015         if (!startTime || !endTime || startTime > endTime) {
2016                 Log("SHUTTLE",
2017                         Form("QueryRunParameters - Invalid parameters for Run %d: startTime = %d, endTime = %d",
2018                                 run, startTime, endTime));
2019                 delete entry;
2020                 delete aRow;
2021                 delete aResult;
2022                 return 0;
2023         }
2024
2025         delete aRow;
2026         delete aResult;
2027
2028         return entry;
2029 }
2030
2031 //______________________________________________________________________________________________
2032 TMap* AliShuttle::GetValueSet(const char* host, Int_t port, const TSeqCollection* entries,
2033                               DCSType type, Int_t multiSplit)
2034 {
2035         // Retrieve all "entry" data points from the DCS server
2036         // host, port: TSocket connection parameters
2037         // entries: list of name of the alias or data point
2038         // type: kAlias or kDP
2039         // returns TMap of values, 0 when failure
2040         
2041         AliDCSClient client(host, port, fTimeout, fRetries, multiSplit);
2042
2043         TMap* result = 0;
2044         if (type == kAlias)
2045         {
2046                 result = client.GetAliasValues(entries, GetCurrentStartTime(), 
2047                         GetCurrentEndTime());
2048         } 
2049         else if (type == kDP)
2050         {
2051                 result = client.GetDPValues(entries, GetCurrentStartTime(), 
2052                         GetCurrentEndTime());
2053         }
2054
2055         if (result == 0)
2056         {
2057                 Log(fCurrentDetector.Data(), Form("GetValueSet - Can't get entries! Reason: %s",
2058                         client.GetErrorString(client.GetResultErrorCode())));
2059                 if (client.GetResultErrorCode() == AliDCSClient::fgkServerError)        
2060                         Log(fCurrentDetector.Data(), Form("GetValueSet - Server error code: %s",
2061                                 client.GetServerError().Data()));
2062
2063                 return 0;
2064         }
2065                 
2066         return result;
2067 }
2068
2069 //______________________________________________________________________________________________
2070 const char* AliShuttle::GetFile(Int_t system, const char* detector,
2071                 const char* id, const char* source)
2072 {
2073         // Get calibration file from file exchange servers
2074         // First queris the FXS database for the file name, using the run, detector, id and source info
2075         // then calls RetrieveFile(filename) for actual copy to local disk
2076         // run: current run being processed (given by Logbook entry fLogbookEntry)
2077         // detector: the Preprocessor name
2078         // id: provided as a parameter by the Preprocessor
2079         // source: provided by the Preprocessor through GetFileSources function
2080
2081         // check if test mode should simulate a FXS error
2082         if (fTestMode & kErrorFXSFiles)
2083         {
2084                 Log(detector, Form("GetFile - In TESTMODE - Simulating error while connecting to %s FXS", GetSystemName(system)));
2085                 return 0;
2086         }
2087         
2088         // check connection, in case connect
2089         if (!Connect(system))
2090         {
2091                 Log(detector, Form("GetFile - Couldn't connect to %s FXS database", GetSystemName(system)));
2092                 return 0;
2093         }
2094
2095         // Query preparation
2096         TString sourceName(source);
2097         Int_t nFields = 3;
2098         TString sqlQueryStart = Form("select filePath,size,fileChecksum from %s where",
2099                                                                 fConfig->GetFXSdbTable(system));
2100         TString whereClause = Form("run=%d and detector=\"%s\" and fileId=\"%s\"",
2101                                                                 GetCurrentRun(), detector, id);
2102
2103         if (system == kDAQ)
2104         {
2105                 whereClause += Form(" and DAQsource=\"%s\"", source);
2106         }
2107         else if (system == kDCS)
2108         {
2109                 sourceName="none";
2110         }
2111         else if (system == kHLT)
2112         {
2113                 whereClause += Form(" and DDLnumbers=\"%s\"", source);
2114                 nFields = 3;
2115         }
2116
2117         TString sqlQuery = Form("%s %s", sqlQueryStart.Data(), whereClause.Data());
2118
2119         AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
2120
2121         // Query execution
2122         TSQLResult* aResult = 0;
2123         aResult = dynamic_cast<TSQLResult*> (fServer[system]->Query(sqlQuery));
2124         if (!aResult) {
2125                 Log(detector, Form("GetFileName - Can't execute SQL query to %s database for: id = %s, source = %s",
2126                                 GetSystemName(system), id, sourceName.Data()));
2127                 return 0;
2128         }
2129
2130         if(aResult->GetRowCount() == 0)
2131         {
2132                 Log(detector,
2133                         Form("GetFileName - No entry in %s FXS db for: id = %s, source = %s",
2134                                 GetSystemName(system), id, sourceName.Data()));
2135                 delete aResult;
2136                 return 0;
2137         }
2138
2139         if (aResult->GetRowCount() > 1) {
2140                 Log(detector,
2141                         Form("GetFileName - More than one entry in %s FXS db for: id = %s, source = %s",
2142                                 GetSystemName(system), id, sourceName.Data()));
2143                 delete aResult;
2144                 return 0;
2145         }
2146
2147         if (aResult->GetFieldCount() != nFields) {
2148                 Log(detector,
2149                         Form("GetFileName - Wrong field count in %s FXS db for: id = %s, source = %s",
2150                                 GetSystemName(system), id, sourceName.Data()));
2151                 delete aResult;
2152                 return 0;
2153         }
2154
2155         TSQLRow* aRow = dynamic_cast<TSQLRow*> (aResult->Next());
2156
2157         if (!aRow){
2158                 Log(detector, Form("GetFileName - Empty set result in %s FXS db from query: id = %s, source = %s",
2159                                 GetSystemName(system), id, sourceName.Data()));
2160                 delete aResult;
2161                 return 0;
2162         }
2163
2164         TString filePath(aRow->GetField(0), aRow->GetFieldLength(0));
2165         TString fileSize(aRow->GetField(1), aRow->GetFieldLength(1));
2166         TString fileChecksum(aRow->GetField(2), aRow->GetFieldLength(2));
2167
2168         delete aResult;
2169         delete aRow;
2170
2171         AliDebug(2, Form("filePath = %s; size = %s, fileChecksum = %s",
2172                                 filePath.Data(), fileSize.Data(), fileChecksum.Data()));
2173
2174         // retrieved file is renamed to make it unique
2175         TString localFileName = Form("%s/%s_%d_process/%s_%s_%d_%s_%s.shuttle",
2176                                         GetShuttleTempDir(), detector, GetCurrentRun(),
2177                                         GetSystemName(system), detector, GetCurrentRun(), 
2178                                         id, sourceName.Data());
2179
2180
2181         // file retrieval from FXS
2182         UInt_t nRetries = 0;
2183         UInt_t maxRetries = 3;
2184         Bool_t result = kFALSE;
2185
2186         // copy!! if successful TSystem::Exec returns 0
2187         while(nRetries++ < maxRetries) {
2188                 AliDebug(2, Form("Trying to copy file. Retry # %d", nRetries));
2189                 result = RetrieveFile(system, filePath.Data(), localFileName.Data());
2190                 if(!result)
2191                 {
2192                         Log(detector, Form("GetFileName - Copy of file %s from %s FXS failed",
2193                                         filePath.Data(), GetSystemName(system)));
2194                         continue;
2195                 } 
2196
2197                 if (fileChecksum.Length()>0)
2198                 {
2199                         // compare md5sum of local file with the one stored in the FXS DB
2200                         Int_t md5Comp = gSystem->Exec(Form("md5sum %s |grep %s 2>&1 > /dev/null",
2201                                                 localFileName.Data(), fileChecksum.Data()));
2202
2203                         if (md5Comp != 0)
2204                         {
2205                                 Log(detector, Form("GetFileName - md5sum of file %s does not match with local copy!",
2206                                                         filePath.Data()));
2207                                 result = kFALSE;
2208                                 continue;
2209                         }
2210                 } else {
2211                         Log(fCurrentDetector, Form("GetFile - md5sum of file %s not set in %s database, skipping comparison",
2212                                                         filePath.Data(), GetSystemName(system)));
2213                 }
2214                 if (result) break;
2215         }
2216
2217         if(!result) return 0;
2218
2219         fFXSCalled[system]=kTRUE;
2220         TObjString *fileParams = new TObjString(Form("%s#!?!#%s", id, sourceName.Data()));
2221         fFXSlist[system].Add(fileParams);
2222
2223         static TString staticLocalFileName;
2224         staticLocalFileName.Form("%s", localFileName.Data());
2225         
2226         Log(fCurrentDetector, Form("GetFile - Retrieved file with id %s and "
2227                         "source %s from %s to %s", id, source, 
2228                         GetSystemName(system), localFileName.Data()));
2229                         
2230         return staticLocalFileName.Data();
2231 }
2232
2233 //______________________________________________________________________________________________
2234 Bool_t AliShuttle::RetrieveFile(UInt_t system, const char* fxsFileName, const char* localFileName)
2235 {
2236         //
2237         // Copies file from FXS to local Shuttle machine
2238         //
2239
2240         // check temp directory: trying to cd to temp; if it does not exist, create it
2241         AliDebug(2, Form("Copy file %s from %s FXS into %s",
2242                         GetSystemName(system), fxsFileName, localFileName));
2243                         
2244         TString tmpDir(localFileName);
2245         
2246         tmpDir = tmpDir(0,tmpDir.Last('/'));
2247
2248         Int_t noDir = gSystem->GetPathInfo(tmpDir.Data(), 0, (Long64_t*) 0, 0, 0);
2249         if (noDir) // temp dir does not exists!
2250         {
2251                 if (gSystem->mkdir(tmpDir.Data(), 1))
2252                 {
2253                         Log(fCurrentDetector.Data(), "RetrieveFile - could not make temp directory!!");
2254                         return kFALSE;
2255                 }
2256         }
2257
2258         TString baseFXSFolder;
2259         if (system == kDAQ)
2260         {
2261                 baseFXSFolder = "FES/";
2262         }
2263         else if (system == kDCS)
2264         {
2265                 baseFXSFolder = "";
2266         }
2267         else if (system == kHLT)
2268         {
2269                 baseFXSFolder = "/opt/FXS/";
2270         }
2271
2272
2273         TString command = Form("scp -oPort=%d -2 %s@%s:%s%s %s",
2274                 fConfig->GetFXSPort(system),
2275                 fConfig->GetFXSUser(system),
2276                 fConfig->GetFXSHost(system),
2277                 baseFXSFolder.Data(),
2278                 fxsFileName,
2279                 localFileName);
2280
2281         AliDebug(2, Form("%s",command.Data()));
2282
2283         Bool_t result = (gSystem->Exec(command.Data()) == 0);
2284
2285         return result;
2286 }
2287
2288 //______________________________________________________________________________________________
2289 TList* AliShuttle::GetFileSources(Int_t system, const char* detector, const char* id)
2290 {
2291         //
2292         // Get sources producing the condition file Id from file exchange servers
2293         // if id is NULL all sources are returned (distinct)
2294         //
2295
2296         Log(detector, Form("GetFileSources - Retrieving sources with id %s from %s", id, GetSystemName(system)));
2297         
2298         // check if test mode should simulate a FXS error
2299         if (fTestMode & kErrorFXSSources)
2300         {
2301                 Log(detector, Form("GetFileSources - In TESTMODE - Simulating error while connecting to %s FXS", GetSystemName(system)));
2302                 return 0;
2303         }
2304
2305         if (system == kDCS)
2306         {
2307                 Log(detector, "GetFileSources - WARNING: DCS system has only one source of data!");
2308                 TList *list = new TList();
2309                 list->SetOwner(1);
2310                 list->Add(new TObjString(" "));
2311                 return list;
2312         }
2313
2314         // check connection, in case connect
2315         if (!Connect(system))
2316         {
2317                 Log(detector, Form("GetFileSources - Couldn't connect to %s FXS database", GetSystemName(system)));
2318                 return NULL;
2319         }
2320
2321         TString sourceName = 0;
2322         if (system == kDAQ)
2323         {
2324                 sourceName = "DAQsource";
2325         } else if (system == kHLT)
2326         {
2327                 sourceName = "DDLnumbers";
2328         }
2329
2330         TString sqlQueryStart = Form("select distinct %s from %s where", sourceName.Data(), fConfig->GetFXSdbTable(system));
2331         TString whereClause = Form("run=%d and detector=\"%s\"",
2332                                 GetCurrentRun(), detector);
2333         if (id)
2334                 whereClause += Form(" and fileId=\"%s\"", id);
2335         TString sqlQuery = Form("%s %s", sqlQueryStart.Data(), whereClause.Data());
2336
2337         AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
2338
2339         // Query execution
2340         TSQLResult* aResult;
2341         aResult = fServer[system]->Query(sqlQuery);
2342         if (!aResult) {
2343                 Log(detector, Form("GetFileSources - Can't execute SQL query to %s database for id: %s",
2344                                 GetSystemName(system), id));
2345                 return 0;
2346         }
2347
2348         TList *list = new TList();
2349         list->SetOwner(1);
2350         
2351         if (aResult->GetRowCount() == 0)
2352         {
2353                 Log(detector,
2354                         Form("GetFileSources - No entry in %s FXS table for id: %s", GetSystemName(system), id));
2355                 delete aResult;
2356                 return list;
2357         }
2358
2359         Log(detector, Form("GetFileSources - Found %d sources", aResult->GetRowCount()));
2360
2361         TSQLRow* aRow;
2362         while ((aRow = aResult->Next()))
2363         {
2364
2365                 TString source(aRow->GetField(0), aRow->GetFieldLength(0));
2366                 AliDebug(2, Form("%s = %s", sourceName.Data(), source.Data()));
2367                 list->Add(new TObjString(source));
2368                 delete aRow;
2369         }
2370
2371         delete aResult;
2372
2373         return list;
2374 }
2375
2376 //______________________________________________________________________________________________
2377 TList* AliShuttle::GetFileIDs(Int_t system, const char* detector, const char* source)
2378 {
2379         //
2380         // Get all ids of condition files produced by a given source from file exchange servers
2381         //
2382         
2383         Log(detector, Form("GetFileIDs - Retrieving ids with source %s with %s", source, GetSystemName(system)));
2384
2385         // check if test mode should simulate a FXS error
2386         if (fTestMode & kErrorFXSSources)
2387         {
2388                 Log(detector, Form("GetFileIDs - In TESTMODE - Simulating error while connecting to %s FXS", GetSystemName(system)));
2389                 return 0;
2390         }
2391
2392         // check connection, in case connect
2393         if (!Connect(system))
2394         {
2395                 Log(detector, Form("GetFileIDs - Couldn't connect to %s FXS database", GetSystemName(system)));
2396                 return NULL;
2397         }
2398
2399         TString sourceName = 0;
2400         if (system == kDAQ)
2401         {
2402                 sourceName = "DAQsource";
2403         } else if (system == kHLT)
2404         {
2405                 sourceName = "DDLnumbers";
2406         }
2407
2408         TString sqlQueryStart = Form("select fileId from %s where", fConfig->GetFXSdbTable(system));
2409         TString whereClause = Form("run=%d and detector=\"%s\"",
2410                                 GetCurrentRun(), detector);
2411         if (sourceName.Length() > 0 && source)
2412                 whereClause += Form(" and %s=\"%s\"", sourceName.Data(), source);
2413         TString sqlQuery = Form("%s %s", sqlQueryStart.Data(), whereClause.Data());
2414
2415         AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
2416
2417         // Query execution
2418         TSQLResult* aResult;
2419         aResult = fServer[system]->Query(sqlQuery);
2420         if (!aResult) {
2421                 Log(detector, Form("GetFileIDs - Can't execute SQL query to %s database for source: %s",
2422                                 GetSystemName(system), source));
2423                 return 0;
2424         }
2425
2426         TList *list = new TList();
2427         list->SetOwner(1);
2428         
2429         if (aResult->GetRowCount() == 0)
2430         {
2431                 Log(detector,
2432                         Form("GetFileIDs - No entry in %s FXS table for source: %s", GetSystemName(system), source));
2433                 delete aResult;
2434                 return list;
2435         }
2436
2437         Log(detector, Form("GetFileIDs - Found %d ids", aResult->GetRowCount()));
2438
2439         TSQLRow* aRow;
2440
2441         while ((aRow = aResult->Next()))
2442         {
2443
2444                 TString id(aRow->GetField(0), aRow->GetFieldLength(0));
2445                 AliDebug(2, Form("fileId = %s", id.Data()));
2446                 list->Add(new TObjString(id));
2447                 delete aRow;
2448         }
2449
2450         delete aResult;
2451
2452         return list;
2453 }
2454
2455 //______________________________________________________________________________________________
2456 Bool_t AliShuttle::Connect(Int_t system)
2457 {
2458         // Connect to MySQL Server of the system's FXS MySQL databases
2459         // DAQ Logbook, Shuttle Logbook and DAQ FXS db are on the same host
2460         //
2461
2462         // check connection: if already connected return
2463         if(fServer[system] && fServer[system]->IsConnected()) return kTRUE;
2464
2465         TString dbHost, dbUser, dbPass, dbName;
2466
2467         if (system < 3) // FXS db servers
2468         {
2469                 dbHost = Form("mysql://%s:%d", fConfig->GetFXSdbHost(system), fConfig->GetFXSdbPort(system));
2470                 dbUser = fConfig->GetFXSdbUser(system);
2471                 dbPass = fConfig->GetFXSdbPass(system);
2472                 dbName =   fConfig->GetFXSdbName(system);
2473         } else { // Run & Shuttle logbook servers
2474         // TODO Will the Shuttle logbook server be the same as the Run logbook server ???
2475                 dbHost = Form("mysql://%s:%d", fConfig->GetDAQlbHost(), fConfig->GetDAQlbPort());
2476                 dbUser = fConfig->GetDAQlbUser();
2477                 dbPass = fConfig->GetDAQlbPass();
2478                 dbName =   fConfig->GetDAQlbDB();
2479         }
2480
2481         fServer[system] = TSQLServer::Connect(dbHost.Data(), dbUser.Data(), dbPass.Data());
2482         if (!fServer[system] || !fServer[system]->IsConnected()) {
2483                 if(system < 3)
2484                 {
2485                 AliError(Form("Can't establish connection to FXS database for %s",
2486                                         AliShuttleInterface::GetSystemName(system)));
2487                 } else {
2488                 AliError("Can't establish connection to Run logbook.");
2489                 }
2490                 if(fServer[system]) delete fServer[system];
2491                 return kFALSE;
2492         }
2493
2494         // Get tables
2495         TSQLResult* aResult=0;
2496         switch(system){
2497                 case kDAQ:
2498                         aResult = fServer[kDAQ]->GetTables(dbName.Data());
2499                         break;
2500                 case kDCS:
2501                         aResult = fServer[kDCS]->GetTables(dbName.Data());
2502                         break;
2503                 case kHLT:
2504                         aResult = fServer[kHLT]->GetTables(dbName.Data());
2505                         break;
2506                 default:
2507                         aResult = fServer[3]->GetTables(dbName.Data());
2508                         break;
2509         }
2510
2511         delete aResult;
2512         return kTRUE;
2513 }
2514
2515 //______________________________________________________________________________________________
2516 Bool_t AliShuttle::UpdateTable()
2517 {
2518         //
2519         // Update FXS table filling time_processed field in all rows corresponding to current run and detector
2520         //
2521
2522         Bool_t result = kTRUE;
2523
2524         for (UInt_t system=0; system<3; system++)
2525         {
2526                 if(!fFXSCalled[system]) continue;
2527
2528                 // check connection, in case connect
2529                 if (!Connect(system))
2530                 {
2531                         Log(fCurrentDetector, Form("UpdateTable - Couldn't connect to %s FXS database", GetSystemName(system)));
2532                         result = kFALSE;
2533                         continue;
2534                 }
2535
2536                 TTimeStamp now; // now
2537
2538                 // Loop on FXS list entries
2539                 TIter iter(&fFXSlist[system]);
2540                 TObjString *aFXSentry=0;
2541                 while ((aFXSentry = dynamic_cast<TObjString*> (iter.Next())))
2542                 {
2543                         TString aFXSentrystr = aFXSentry->String();
2544                         TObjArray *aFXSarray = aFXSentrystr.Tokenize("#!?!#");
2545                         if (!aFXSarray || aFXSarray->GetEntries() != 2 )
2546                         {
2547                                 Log(fCurrentDetector, Form("UpdateTable - error updating %s FXS entry. Check string: <%s>",
2548                                         GetSystemName(system), aFXSentrystr.Data()));
2549                                 if(aFXSarray) delete aFXSarray;
2550                                 result = kFALSE;
2551                                 continue;
2552                         }
2553                         const char* fileId = ((TObjString*) aFXSarray->At(0))->GetName();
2554                         const char* source = ((TObjString*) aFXSarray->At(1))->GetName();
2555
2556                         TString whereClause;
2557                         if (system == kDAQ)
2558                         {
2559                                 whereClause = Form("where run=%d and detector=\"%s\" and fileId=\"%s\" and DAQsource=\"%s\";",
2560                                                         GetCurrentRun(), fCurrentDetector.Data(), fileId, source);
2561                         }
2562                         else if (system == kDCS)
2563                         {
2564                                 whereClause = Form("where run=%d and detector=\"%s\" and fileId=\"%s\";",
2565                                                         GetCurrentRun(), fCurrentDetector.Data(), fileId);
2566                         }
2567                         else if (system == kHLT)
2568                         {
2569                                 whereClause = Form("where run=%d and detector=\"%s\" and fileId=\"%s\" and DDLnumbers=\"%s\";",
2570                                                         GetCurrentRun(), fCurrentDetector.Data(), fileId, source);
2571                         }
2572
2573                         delete aFXSarray;
2574
2575                         TString sqlQuery = Form("update %s set time_processed=%d %s", fConfig->GetFXSdbTable(system),
2576                                                                 now.GetSec(), whereClause.Data());
2577
2578                         AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
2579
2580                         // Query execution
2581                         TSQLResult* aResult;
2582                         aResult = dynamic_cast<TSQLResult*> (fServer[system]->Query(sqlQuery));
2583                         if (!aResult)
2584                         {
2585                                 Log(fCurrentDetector, Form("UpdateTable - %s db: can't execute SQL query <%s>",
2586                                                                 GetSystemName(system), sqlQuery.Data()));
2587                                 result = kFALSE;
2588                                 continue;
2589                         }
2590                         delete aResult;
2591                 }
2592         }
2593
2594         return result;
2595 }
2596
2597 //______________________________________________________________________________________________
2598 Bool_t AliShuttle::UpdateTableFailCase()
2599 {
2600         // Update FXS table filling time_processed field in all rows corresponding to current run and detector
2601         // this is called in case the preprocessor is declared failed for the current run, because
2602         // the fields are updated only in case of success
2603
2604         Bool_t result = kTRUE;
2605
2606         for (UInt_t system=0; system<3; system++)
2607         {
2608                 // check connection, in case connect
2609                 if (!Connect(system))
2610                 {
2611                         Log(fCurrentDetector, Form("UpdateTableFailCase - Couldn't connect to %s FXS database",
2612                                                         GetSystemName(system)));
2613                         result = kFALSE;
2614                         continue;
2615                 }
2616
2617                 TTimeStamp now; // now
2618
2619                 // Loop on FXS list entries
2620
2621                 TString whereClause = Form("where run=%d and detector=\"%s\";",
2622                                                 GetCurrentRun(), fCurrentDetector.Data());
2623
2624
2625                 TString sqlQuery = Form("update %s set time_processed=%d %s", fConfig->GetFXSdbTable(system),
2626                                                         now.GetSec(), whereClause.Data());
2627
2628                 AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
2629
2630                 // Query execution
2631                 TSQLResult* aResult;
2632                 aResult = dynamic_cast<TSQLResult*> (fServer[system]->Query(sqlQuery));
2633                 if (!aResult)
2634                 {
2635                         Log(fCurrentDetector, Form("UpdateTableFailCase - %s db: can't execute SQL query <%s>",
2636                                                         GetSystemName(system), sqlQuery.Data()));
2637                         result = kFALSE;
2638                         continue;
2639                 }
2640                 delete aResult;
2641         }
2642
2643         return result;
2644 }
2645
2646 //______________________________________________________________________________________________
2647 Bool_t AliShuttle::UpdateShuttleLogbook(const char* detector, const char* status)
2648 {
2649         //
2650         // Update Shuttle logbook filling detector or shuttle_done column
2651         // ex. of usage: UpdateShuttleLogbook("PHOS", "DONE") or UpdateShuttleLogbook("shuttle_done")
2652         //
2653
2654         // check connection, in case connect
2655         if(!Connect(3)){
2656                 Log("SHUTTLE", "UpdateShuttleLogbook - Couldn't connect to DAQ Logbook.");
2657                 return kFALSE;
2658         }
2659
2660         TString detName(detector);
2661         TString setClause;
2662         if(detName == "shuttle_done")
2663         {
2664                 setClause = "set shuttle_done=1";
2665
2666                 // Send the information to ML
2667                 TMonaLisaText  mlStatus("SHUTTLE_status", "Done");
2668
2669                 TList mlList;
2670                 mlList.Add(&mlStatus);
2671
2672                 fMonaLisa->SendParameters(&mlList);
2673         } else {
2674                 TString statusStr(status);
2675                 if(statusStr.Contains("done", TString::kIgnoreCase) ||
2676                    statusStr.Contains("failed", TString::kIgnoreCase)){
2677                         setClause = Form("set %s=\"%s\"", detector, status);
2678                 } else {
2679                         Log("SHUTTLE",
2680                                 Form("UpdateShuttleLogbook - Invalid status <%s> for detector %s",
2681                                         status, detector));
2682                         return kFALSE;
2683                 }
2684         }
2685
2686         TString whereClause = Form("where run=%d", GetCurrentRun());
2687
2688         TString sqlQuery = Form("update %s %s %s",
2689                                         fConfig->GetShuttlelbTable(), setClause.Data(), whereClause.Data());
2690
2691         AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
2692
2693         // Query execution
2694         TSQLResult* aResult;
2695         aResult = dynamic_cast<TSQLResult*> (fServer[3]->Query(sqlQuery));
2696         if (!aResult) {
2697                 Log("SHUTTLE", Form("UpdateShuttleLogbook - Can't execute query <%s>", sqlQuery.Data()));
2698                 return kFALSE;
2699         }
2700         delete aResult;
2701
2702         return kTRUE;
2703 }
2704
2705 //______________________________________________________________________________________________
2706 Int_t AliShuttle::GetCurrentRun() const
2707 {
2708         //
2709         // Get current run from logbook entry
2710         //
2711
2712         return fLogbookEntry ? fLogbookEntry->GetRun() : -1;
2713 }
2714
2715 //______________________________________________________________________________________________
2716 UInt_t AliShuttle::GetCurrentStartTime() const
2717 {
2718         //
2719         // get current start time
2720         //
2721
2722         return fLogbookEntry ? fLogbookEntry->GetStartTime() : 0;
2723 }
2724
2725 //______________________________________________________________________________________________
2726 UInt_t AliShuttle::GetCurrentEndTime() const
2727 {
2728         //
2729         // get current end time from logbook entry
2730         //
2731
2732         return fLogbookEntry ? fLogbookEntry->GetEndTime() : 0;
2733 }
2734
2735 //______________________________________________________________________________________________
2736 UInt_t AliShuttle::GetCurrentYear() const
2737 {
2738         //
2739         // Get current year from logbook entry
2740         //
2741
2742         if (!fLogbookEntry) return 0;
2743         
2744         TTimeStamp startTime(GetCurrentStartTime());
2745         TString year =  Form("%d",startTime.GetDate());
2746         year = year(0,4);
2747         
2748         return year.Atoi();
2749 }
2750
2751 //______________________________________________________________________________________________
2752 const char* AliShuttle::GetLHCPeriod() const
2753 {
2754         //
2755         // Get current LHC period from logbook entry
2756         //
2757
2758         if (!fLogbookEntry) return 0;
2759                 
2760         return fLogbookEntry->GetRunParameter("LHCperiod");
2761 }
2762
2763 //______________________________________________________________________________________________
2764 void AliShuttle::Log(const char* detector, const char* message)
2765 {
2766         //
2767         // Fill log string with a message
2768         //
2769
2770         TString logRunDir = GetShuttleLogDir();
2771         if (GetCurrentRun() >=0)
2772                 logRunDir += Form("/%d", GetCurrentRun());
2773         
2774         void* dir = gSystem->OpenDirectory(logRunDir.Data());
2775         if (dir == NULL) {
2776                 if (gSystem->mkdir(logRunDir.Data(), kTRUE)) {
2777                         AliError(Form("Can't open directory <%s>", GetShuttleLogDir()));
2778                         return;
2779                 }
2780
2781         } else {
2782                 gSystem->FreeDirectory(dir);
2783         }
2784
2785         TString toLog = Form("%s (%d): %s - ", TTimeStamp(time(0)).AsString("s"), getpid(), detector);
2786         if (GetCurrentRun() >= 0) 
2787                 toLog += Form("run %d - ", GetCurrentRun());
2788         toLog += Form("%s", message);
2789
2790         AliInfo(toLog.Data());
2791         
2792         // if we redirect the log output already to the file, leave here
2793         if (fOutputRedirected && strcmp(detector, "SHUTTLE") != 0)
2794                 return;
2795
2796         TString fileName = GetLogFileName(detector);
2797         
2798         gSystem->ExpandPathName(fileName);
2799
2800         ofstream logFile;
2801         logFile.open(fileName, ofstream::out | ofstream::app);
2802
2803         if (!logFile.is_open()) {
2804                 AliError(Form("Could not open file %s", fileName.Data()));
2805                 return;
2806         }
2807
2808         logFile << toLog.Data() << "\n";
2809
2810         logFile.close();
2811 }
2812
2813 //______________________________________________________________________________________________
2814 TString AliShuttle::GetLogFileName(const char* detector) const
2815 {
2816         // 
2817         // returns the name of the log file for a given sub detector
2818         //
2819         
2820         TString fileName;
2821         
2822         if (GetCurrentRun() >= 0) 
2823         {
2824                 fileName.Form("%s/%d/%s_%d.log", GetShuttleLogDir(), GetCurrentRun(), 
2825                         detector, GetCurrentRun());
2826         } else {
2827                 fileName.Form("%s/%s.log", GetShuttleLogDir(), detector);
2828         }
2829
2830         return fileName;
2831 }
2832
2833 //______________________________________________________________________________________________
2834 Bool_t AliShuttle::Collect(Int_t run)
2835 {
2836         //
2837         // Collects conditions data for all UNPROCESSED run written to DAQ LogBook in case of run = -1 (default)
2838         // If a dedicated run is given this run is processed
2839         //
2840         // In operational mode, this is the Shuttle function triggered by the EOR signal.
2841         //
2842
2843         if (run == -1)
2844                 Log("SHUTTLE","Collect - Shuttle called. Collecting conditions data for unprocessed runs");
2845         else
2846                 Log("SHUTTLE", Form("Collect - Shuttle called. Collecting conditions data for run %d", run));
2847
2848         SetLastAction("Starting");
2849
2850         TString whereClause("where shuttle_done=0");
2851         if (run != -1)
2852                 whereClause += Form(" and run=%d", run);
2853
2854         TObjArray shuttleLogbookEntries;
2855         if (!QueryShuttleLogbook(whereClause, shuttleLogbookEntries))
2856         {
2857                 Log("SHUTTLE", "Collect - Can't retrieve entries from Shuttle logbook");
2858                 return kFALSE;
2859         }
2860
2861         if (shuttleLogbookEntries.GetEntries() == 0)
2862         {
2863                 if (run == -1)
2864                         Log("SHUTTLE","Collect - Found no UNPROCESSED runs in Shuttle logbook");
2865                 else
2866                         Log("SHUTTLE", Form("Collect - Run %d is already DONE "
2867                                                 "or it does not exist in Shuttle logbook", run));
2868                 return kTRUE;
2869         }
2870
2871         for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
2872                 fFirstUnprocessed[iDet] = kTRUE;
2873
2874         if (run != -1)
2875         {
2876                 // query Shuttle logbook for earlier runs, check if some detectors are unprocessed,
2877                 // flag them into fFirstUnprocessed array
2878                 TString whereClause(Form("where shuttle_done=0 and run < %d", run));
2879                 TObjArray tmpLogbookEntries;
2880                 if (!QueryShuttleLogbook(whereClause, tmpLogbookEntries))
2881                 {
2882                         Log("SHUTTLE", "Collect - Can't retrieve entries from Shuttle logbook");
2883                         return kFALSE;
2884                 }
2885
2886                 TIter iter(&tmpLogbookEntries);
2887                 AliShuttleLogbookEntry* anEntry = 0;
2888                 while ((anEntry = dynamic_cast<AliShuttleLogbookEntry*> (iter.Next())))
2889                 {
2890                         for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
2891                         {
2892                                 if (anEntry->GetDetectorStatus(iDet) == AliShuttleLogbookEntry::kUnprocessed)
2893                                 {
2894                                         AliDebug(2, Form("Run %d: setting %s as \"not first time unprocessed\"",
2895                                                         anEntry->GetRun(), GetDetName(iDet)));
2896                                         fFirstUnprocessed[iDet] = kFALSE;
2897                                 }
2898                         }
2899
2900                 }
2901
2902         }
2903
2904         if (!RetrieveConditionsData(shuttleLogbookEntries))
2905         {
2906                 Log("SHUTTLE", "Collect - Process of at least one run failed");
2907                 return kFALSE;
2908         }
2909
2910         Log("SHUTTLE", "Collect - Requested run(s) successfully processed");
2911         return kTRUE;
2912 }
2913
2914 //______________________________________________________________________________________________
2915 Bool_t AliShuttle::RetrieveConditionsData(const TObjArray& dateEntries)
2916 {
2917         //
2918         // Retrieve conditions data for all runs that aren't processed yet
2919         //
2920
2921         Bool_t hasError = kFALSE;
2922
2923         TIter iter(&dateEntries);
2924         AliShuttleLogbookEntry* anEntry;
2925
2926         while ((anEntry = (AliShuttleLogbookEntry*) iter.Next())){
2927                 if (!Process(anEntry)){
2928                         hasError = kTRUE;
2929                 }
2930
2931                 // clean SHUTTLE temp directory
2932                 //TString filename = Form("%s/*.shuttle", GetShuttleTempDir());
2933                 //RemoveFile(filename.Data());
2934         }
2935
2936         return hasError == kFALSE;
2937 }
2938
2939 //______________________________________________________________________________________________
2940 ULong_t AliShuttle::GetTimeOfLastAction() const
2941 {
2942         //
2943         // Gets time of last action
2944         //
2945
2946         ULong_t tmp;
2947
2948         fMonitoringMutex->Lock();
2949
2950         tmp = fLastActionTime;
2951
2952         fMonitoringMutex->UnLock();
2953
2954         return tmp;
2955 }
2956
2957 //______________________________________________________________________________________________
2958 const TString AliShuttle::GetLastAction() const
2959 {
2960         //
2961         // returns a string description of the last action
2962         //
2963
2964         TString tmp;
2965
2966         fMonitoringMutex->Lock();
2967         
2968         tmp = fLastAction;
2969         
2970         fMonitoringMutex->UnLock();
2971
2972         return tmp;
2973 }
2974
2975 //______________________________________________________________________________________________
2976 void AliShuttle::SetLastAction(const char* action)
2977 {
2978         //
2979         // updates the monitoring variables
2980         //
2981
2982         fMonitoringMutex->Lock();
2983
2984         fLastAction = action;
2985         fLastActionTime = time(0);
2986         
2987         fMonitoringMutex->UnLock();
2988 }
2989
2990 //______________________________________________________________________________________________
2991 const char* AliShuttle::GetRunParameter(const char* param)
2992 {
2993         //
2994         // returns run parameter read from DAQ logbook
2995         //
2996
2997         if(!fLogbookEntry) {
2998                 AliError("No logbook entry!");
2999                 return 0;
3000         }
3001
3002         return fLogbookEntry->GetRunParameter(param);
3003 }
3004
3005 //______________________________________________________________________________________________
3006 AliCDBEntry* AliShuttle::GetFromOCDB(const char* detector, const AliCDBPath& path)
3007 {
3008         //
3009         // returns object from OCDB valid for current run
3010         //
3011
3012         if (fTestMode & kErrorOCDB)
3013         {
3014                 Log(detector, "GetFromOCDB - In TESTMODE - Simulating error with OCDB");
3015                 return 0;
3016         }
3017         
3018         AliCDBStorage *sto = AliCDBManager::Instance()->GetStorage(fgkMainCDB);
3019         if (!sto)
3020         {
3021                 Log(detector, "GetFromOCDB - Cannot activate main OCDB for query!");
3022                 return 0;
3023         }
3024
3025         return dynamic_cast<AliCDBEntry*> (sto->Get(path, GetCurrentRun()));
3026 }
3027
3028 //______________________________________________________________________________________________
3029 Bool_t AliShuttle::SendMail()
3030 {
3031         //
3032         // sends a mail to the subdetector expert in case of preprocessor error
3033         //
3034         
3035         if (fTestMode != kNone)
3036                 return kTRUE;
3037
3038         void* dir = gSystem->OpenDirectory(GetShuttleLogDir());
3039         if (dir == NULL)
3040         {
3041                 if (gSystem->mkdir(GetShuttleLogDir(), kTRUE))
3042                 {
3043                         Log("SHUTTLE", Form("SendMail - Can't open directory <%s>", GetShuttleLogDir()));
3044                         return kFALSE;
3045                 }
3046
3047         } else {
3048                 gSystem->FreeDirectory(dir);
3049         }
3050
3051         TString bodyFileName;
3052         bodyFileName.Form("%s/mail.body", GetShuttleLogDir());
3053         gSystem->ExpandPathName(bodyFileName);
3054
3055         ofstream mailBody;
3056         mailBody.open(bodyFileName, ofstream::out);
3057
3058         if (!mailBody.is_open())
3059         {
3060                 Log("SHUTTLE", Form("Could not open mail body file %s", bodyFileName.Data()));
3061                 return kFALSE;
3062         }
3063
3064         TString to="";
3065         TIter iterExperts(fConfig->GetResponsibles(fCurrentDetector));
3066         TObjString *anExpert=0;
3067         while ((anExpert = (TObjString*) iterExperts.Next()))
3068         {
3069                 to += Form("%s,", anExpert->GetName());
3070         }
3071         to.Remove(to.Length()-1);
3072         AliDebug(2, Form("to: %s",to.Data()));
3073
3074         if (to.IsNull()) {
3075                 Log("SHUTTLE", "List of detector responsibles not yet set!");
3076                 return kFALSE;
3077         }
3078
3079         TString cc="alberto.colla@cern.ch";
3080
3081         TString subject = Form("%s Shuttle preprocessor FAILED in run %d !",
3082                                 fCurrentDetector.Data(), GetCurrentRun());
3083         AliDebug(2, Form("subject: %s", subject.Data()));
3084
3085         TString body = Form("Dear %s expert(s), \n\n", fCurrentDetector.Data());
3086         body += Form("SHUTTLE just detected that your preprocessor "
3087                         "failed processing run %d!!\n\n", GetCurrentRun());
3088         body += Form("Please check %s status on the SHUTTLE monitoring page: \n\n", 
3089                                 fCurrentDetector.Data());
3090         body += Form("\thttp://pcalimonitor.cern.ch:8889/shuttle.jsp?time=168 \n\n");
3091         
3092         TString logFolder = "logs";
3093         if (fConfig->GetRunMode() == AliShuttleConfig::kProd) 
3094                 logFolder += "_PROD";
3095         
3096         
3097         body += Form("Find the %s log for the current run on \n\n"
3098                 "\thttp://pcalishuttle01.cern.ch:8880/%s/%d/%s_%d.log \n\n", 
3099                 fCurrentDetector.Data(), logFolder.Data(), GetCurrentRun(), 
3100                                 fCurrentDetector.Data(), GetCurrentRun());
3101         body += Form("The last 10 lines of %s log file are following:\n\n", fCurrentDetector.Data());
3102
3103         AliDebug(2, Form("Body begin: %s", body.Data()));
3104
3105         mailBody << body.Data();
3106         mailBody.close();
3107         mailBody.open(bodyFileName, ofstream::out | ofstream::app);
3108
3109         TString logFileName = Form("%s/%d/%s_%d.log", GetShuttleLogDir(), 
3110                 GetCurrentRun(), fCurrentDetector.Data(), GetCurrentRun());