5dafd7c3dd6edc854fdbd1a2e2c9a5c176186f22
[u/mrichter/AliRoot.git] / SHUTTLE / AliShuttle.cxx
1 /**************************************************************************
2  * Copyright(c) 1998-1999, ALICE Experiment at CERN, All rights reserved. *
3  *                                                                        *
4  * Author: The ALICE Off-line Project.                                    *
5  * Contributors are mentioned in the code where appropriate.              *
6  *                                                                        *
7  * Permission to use, copy, modify and distribute this software and its   *
8  * documentation strictly for non-commercial purposes is hereby granted   *
9  * without fee, provided that the above copyright notice appears in all   *
10  * copies and that both the copyright notice and this permission notice   *
11  * appear in the supporting documentation. The authors make no claims     *
12  * about the suitability of this software for any purpose. It is          *
13  * provided "as is" without express or implied warranty.                  *
14  **************************************************************************/
15
16 /*
17 $Log$
18 Revision 1.65  2007/11/26 16:58:37  acolla
19 Monalisa configuration added: host and table name
20
21 Revision 1.64  2007/11/13 16:15:47  acolla
22 DCS map is stored in a file in the temp folder where the detector is processed.
23 If the preprocessor fails, the temp folder is not removed. This will help the debugging of the problem.
24
25 Revision 1.63  2007/11/02 10:53:16  acolla
26 Protection added to AliShuttle::CopyFileLocally
27
28 Revision 1.62  2007/10/31 18:23:13  acolla
29 Furter developement on the Shuttle:
30
31 - Shuttle now connects to the Grid as alidaq. The OCDB and Reference folders
32 are now built from /alice/data, e.g.:
33 /alice/data/2007/LHC07a/OCDB
34
35 the year and LHC period are taken from the Shuttle.
36 Raw metadata files are stored by GRP to:
37 /alice/data/2007/LHC07a/<runNb>/Raw/RunMetadata.root
38
39 - Shuttle sends a mail to DCS experts each time DP retrieval fails.
40
41 Revision 1.61  2007/10/30 20:33:51  acolla
42 Improved managing of temporary folders, which weren't correctly handled.
43 Resolved bug introduced in StoreReferenceFile, which caused SPD preprocessor fail.
44
45 Revision 1.60  2007/10/29 18:06:16  acolla
46
47 New function StoreRunMetadataFile added to preprocessor and Shuttle interface
48 This function can be used by GRP only. It stores raw data tags merged file to the
49 raw data folder (e.g. /alice/data/2008/LHC08a/000099999/Raw).
50
51 KNOWN ISSUES:
52
53 1. Shuttle cannot write to /alice/data/ because it belongs to alidaq. Tag file is stored in /alice/simulation/... for the time being.
54 2. Due to a bug in TAlien::Mkdir, the creation of a folder in recursive mode (-p option) does not work. The problem
55 has been corrected in the root package on the Shuttle machine.
56
57 Revision 1.59  2007/10/05 12:40:55  acolla
58
59 Result error code added to AliDCSClient data members (it was "lost" with the new implementation of TMap* GetAliasValues and GetDPValues).
60
61 Revision 1.58  2007/09/28 15:27:40  acolla
62
63 AliDCSClient "multiSplit" option added in the DCS configuration
64 in AliDCSMessage: variable MAX_BODY_SIZE set to 500000
65
66 Revision 1.57  2007/09/27 16:53:13  acolla
67 Detectors can have more than one AMANDA server. SHUTTLE queries the servers sequentially,
68 merges the dcs aliases/DPs in one TMap and sends it to the preprocessor.
69
70 Revision 1.56  2007/09/14 16:46:14  jgrosseo
71 1) Connect and Close are called before and after each query, so one can
72 keep the same AliDCSClient object.
73 2) The splitting of a query is moved to GetDPValues/GetAliasValues.
74 3) Splitting interval can be specified in constructor
75
76 Revision 1.55  2007/08/06 12:26:40  acolla
77 Function Bool_t GetHLTStatus added to preprocessor. It returns the status of HLT
78 read from the run logbook.
79
80 Revision 1.54  2007/07/12 09:51:25  jgrosseo
81 removed duplicated log message in GetFile
82
83 Revision 1.53  2007/07/12 09:26:28  jgrosseo
84 updating hlt fxs base path
85
86 Revision 1.52  2007/07/12 08:06:45  jgrosseo
87 adding log messages in getfile... functions
88 adding not implemented copy constructor in alishuttleconfigholder
89
90 Revision 1.51  2007/07/03 17:24:52  acolla
91 root moved to v5-16-00. TFileMerger->Cp moved to TFile::Cp.
92
93 Revision 1.50  2007/07/02 17:19:32  acolla
94 preprocessor is run in a temp directory that is removed when process is finished.
95
96 Revision 1.49  2007/06/29 10:45:06  acolla
97 Number of columns in MySql Shuttle logbook increased by one (HLT added)
98
99 Revision 1.48  2007/06/21 13:06:19  acolla
100 GetFileSources returns dummy list with 1 source if system=DCS (better than
101 returning error as it was)
102
103 Revision 1.47  2007/06/19 17:28:56  acolla
104 HLT updated; missing map bug removed.
105
106 Revision 1.46  2007/06/09 13:01:09  jgrosseo
107 Switching to retrieval of several DCS DPs at a time (multiDPrequest)
108
109 Revision 1.45  2007/05/30 06:35:20  jgrosseo
110 Adding functionality to the Shuttle/TestShuttle:
111 o) Function to retrieve list of sources from a given system (GetFileSources with id=0)
112 o) Function to retrieve list of IDs for a given source      (GetFileIDs)
113 These functions are needed for dealing with the tag files that are saved for the GRP preprocessor
114 Example code has been added to the TestProcessor in TestShuttle
115
116 Revision 1.44  2007/05/11 16:09:32  acolla
117 Reference files for ITS, MUON and PHOS are now stored in OfflineDetName/OnlineDetName/run_...
118 example: ITS/SPD/100_filename.root
119
120 Revision 1.43  2007/05/10 09:59:51  acolla
121 Various bug fixes in StoreRefFilesToGrid; Cleaning of reference storage before processing detector (CleanReferenceStorage)
122
123 Revision 1.42  2007/05/03 08:01:39  jgrosseo
124 typo in last commit :-(
125
126 Revision 1.41  2007/05/03 08:00:48  jgrosseo
127 fixing log message when pp want to skip dcs value retrieval
128
129 Revision 1.40  2007/04/27 07:06:48  jgrosseo
130 GetFileSources returns empty list in case of no files, but successful query
131 No mails sent in testmode
132
133 Revision 1.39  2007/04/17 12:43:57  acolla
134 Correction in StoreOCDB; change of text in mail to detector expert
135
136 Revision 1.38  2007/04/12 08:26:18  jgrosseo
137 updated comment
138
139 Revision 1.37  2007/04/10 16:53:14  jgrosseo
140 redirecting sub detector stdout, stderr to sub detector log file
141
142 Revision 1.35  2007/04/04 16:26:38  acolla
143 1. Re-organization of function calls in TestPreprocessor to make it more meaningful.
144 2. Added missing dependency in test preprocessors.
145 3. in AliShuttle.cxx: processing time and memory consumption info on a single line.
146
147 Revision 1.34  2007/04/04 10:33:36  jgrosseo
148 1) Storing of files to the Grid is now done _after_ your preprocessors succeeded. This is transparent, which means that you can still use the same functions (Store, StoreReferenceData) to store files to the Grid. However, the Shuttle first stores them locally and transfers them after the preprocessor finished. The return code of these two functions has changed from UInt_t to Bool_t which gives you the success of the storing.
149 In case of an error with the Grid, the Shuttle will retry the storing later, the preprocessor does not need to be run again.
150
151 2) The meaning of the return code of the preprocessor has changed. 0 is now success and any other value means failure. This value is stored in the log and you can use it to keep details about the error condition.
152
153 3) New function StoreReferenceFile to _directly_ store a file (without opening it) to the reference storage.
154
155 4) The memory usage of the preprocessor is monitored. If it exceeds 2 GB it is terminated.
156
157 5) New function AliPreprocessor::ProcessDCS(). If you do not need to have DCS data in all cases, you can skip the processing by implemting this function and returning kFALSE under certain conditions. E.g. if there is a certain run type.
158 If you always need DCS data (like before), you do not need to implement it.
159
160 6) The run type has been added to the monitoring page
161
162 Revision 1.33  2007/04/03 13:56:01  acolla
163 Grid Storage at the end of preprocessing. Added virtual method to disable DCS query according to the
164 run type.
165
166 Revision 1.32  2007/02/28 10:41:56  acolla
167 Run type field added in SHUTTLE framework. Run type is read from "run type" logbook and retrieved by
168 AliPreprocessor::GetRunType() function.
169 Added some ldap definition files.
170
171 Revision 1.30  2007/02/13 11:23:21  acolla
172 Moved getters and setters of Shuttle's main OCDB/Reference, local
173 OCDB/Reference, temp and log folders to AliShuttleInterface
174
175 Revision 1.27  2007/01/30 17:52:42  jgrosseo
176 adding monalisa monitoring
177
178 Revision 1.26  2007/01/23 19:20:03  acolla
179 Removed old ldif files, added TOF, MCH ldif files. Added some options in
180 AliShuttleConfig::Print. Added in Ali Shuttle: SetShuttleTempDir and
181 SetShuttleLogDir
182
183 Revision 1.25  2007/01/15 19:13:52  acolla
184 Moved some AliInfo to AliDebug in SendMail function
185
186 Revision 1.21  2006/12/07 08:51:26  jgrosseo
187 update (alberto):
188 table, db names in ldap configuration
189 added GRP preprocessor
190 DCS data can also be retrieved by data point
191
192 Revision 1.20  2006/11/16 16:16:48  jgrosseo
193 introducing strict run ordering flag
194 removed giving preprocessor name to preprocessor, they have to know their name themselves ;-)
195
196 Revision 1.19  2006/11/06 14:23:04  jgrosseo
197 major update (Alberto)
198 o) reading of run parameters from the logbook
199 o) online offline naming conversion
200 o) standalone DCSclient package
201
202 Revision 1.18  2006/10/20 15:22:59  jgrosseo
203 o) Adding time out to the execution of the preprocessors: The Shuttle forks and the parent process monitors the child
204 o) Merging Collect, CollectAll, CollectNew function
205 o) Removing implementation of empty copy constructors (declaration still there!)
206
207 Revision 1.17  2006/10/05 16:20:55  jgrosseo
208 adapting to new CDB classes
209
210 Revision 1.16  2006/10/05 15:46:26  jgrosseo
211 applying to the new interface
212
213 Revision 1.15  2006/10/02 16:38:39  jgrosseo
214 update (alberto):
215 fixed memory leaks
216 storing of objects that failed to be stored to the grid before
217 interfacing of shuttle status table in daq system
218
219 Revision 1.14  2006/08/29 09:16:05  jgrosseo
220 small update
221
222 Revision 1.13  2006/08/15 10:50:00  jgrosseo
223 effc++ corrections (alberto)
224
225 Revision 1.12  2006/08/08 14:19:29  jgrosseo
226 Update to shuttle classes (Alberto)
227
228 - Possibility to set the full object's path in the Preprocessor's and
229 Shuttle's  Store functions
230 - Possibility to extend the object's run validity in the same classes
231 ("startValidity" and "validityInfinite" parameters)
232 - Implementation of the StoreReferenceData function to store reference
233 data in a dedicated CDB storage.
234
235 Revision 1.11  2006/07/21 07:37:20  jgrosseo
236 last run is stored after each run
237
238 Revision 1.10  2006/07/20 09:54:40  jgrosseo
239 introducing status management: The processing per subdetector is divided into several steps,
240 after each step the status is stored on disk. If the system crashes in any of the steps the Shuttle
241 can keep track of the number of failures and skips further processing after a certain threshold is
242 exceeded. These thresholds can be configured in LDAP.
243
244 Revision 1.9  2006/07/19 10:09:55  jgrosseo
245 new configuration, accesst to DAQ FES (Alberto)
246
247 Revision 1.8  2006/07/11 12:44:36  jgrosseo
248 adding parameters for extended validity range of data produced by preprocessor
249
250 Revision 1.7  2006/07/10 14:37:09  jgrosseo
251 small fix + todo comment
252
253 Revision 1.6  2006/07/10 13:01:41  jgrosseo
254 enhanced storing of last sucessfully processed run (alberto)
255
256 Revision 1.5  2006/07/04 14:59:57  jgrosseo
257 revision of AliDCSValue: Removed wrapper classes, reduced storage size per value by factor 2
258
259 Revision 1.4  2006/06/12 09:11:16  jgrosseo
260 coding conventions (Alberto)
261
262 Revision 1.3  2006/06/06 14:26:40  jgrosseo
263 o) removed files that were moved to STEER
264 o) shuttle updated to follow the new interface (Alberto)
265
266 Revision 1.2  2006/03/07 07:52:34  hristov
267 New version (B.Yordanov)
268
269 Revision 1.6  2005/11/19 17:19:14  byordano
270 RetrieveDATEEntries and RetrieveConditionsData added
271
272 Revision 1.5  2005/11/19 11:09:27  byordano
273 AliShuttle declaration added
274
275 Revision 1.4  2005/11/17 17:47:34  byordano
276 TList changed to TObjArray
277
278 Revision 1.3  2005/11/17 14:43:23  byordano
279 import to local CVS
280
281 Revision 1.1.1.1  2005/10/28 07:33:58  hristov
282 Initial import as subdirectory in AliRoot
283
284 Revision 1.2  2005/09/13 08:41:15  byordano
285 default startTime endTime added
286
287 Revision 1.4  2005/08/30 09:13:02  byordano
288 some docs added
289
290 Revision 1.3  2005/08/29 21:15:47  byordano
291 some docs added
292
293 */
294
295 //
296 // This class is the main manager for AliShuttle. 
297 // It organizes the data retrieval from DCS and call the 
298 // interface methods of AliPreprocessor.
299 // For every detector in AliShuttleConfgi (see AliShuttleConfig),
300 // data for its set of aliases is retrieved. If there is registered
301 // AliPreprocessor for this detector then it will be used
302 // accroding to the schema (see AliPreprocessor).
303 // If there isn't registered AliPreprocessor than the retrieved
304 // data is stored automatically to the undelying AliCDBStorage.
305 // For detSpec is used the alias name.
306 //
307
308 #include "AliShuttle.h"
309
310 #include "AliCDBManager.h"
311 #include "AliCDBStorage.h"
312 #include "AliCDBId.h"
313 #include "AliCDBRunRange.h"
314 #include "AliCDBPath.h"
315 #include "AliCDBEntry.h"
316 #include "AliShuttleConfig.h"
317 #include "DCSClient/AliDCSClient.h"
318 #include "AliLog.h"
319 #include "AliPreprocessor.h"
320 #include "AliShuttleStatus.h"
321 #include "AliShuttleLogbookEntry.h"
322
323 #include <TSystem.h>
324 #include <TObject.h>
325 #include <TString.h>
326 #include <TTimeStamp.h>
327 #include <TObjString.h>
328 #include <TSQLServer.h>
329 #include <TSQLResult.h>
330 #include <TSQLRow.h>
331 #include <TMutex.h>
332 #include <TSystemDirectory.h>
333 #include <TSystemFile.h>
334 #include <TFile.h>
335 #include <TGrid.h>
336 #include <TGridResult.h>
337
338 #include <TMonaLisaWriter.h>
339
340 #include <fstream>
341
342 #include <sys/types.h>
343 #include <sys/wait.h>
344
345 ClassImp(AliShuttle)
346
347 //______________________________________________________________________________________________
348 AliShuttle::AliShuttle(const AliShuttleConfig* config,
349                 UInt_t timeout, Int_t retries):
350 fConfig(config),
351 fTimeout(timeout), fRetries(retries),
352 fPreprocessorMap(),
353 fLogbookEntry(0),
354 fCurrentDetector(),
355 fStatusEntry(0),
356 fMonitoringMutex(0),
357 fLastActionTime(0),
358 fLastAction(),
359 fMonaLisa(0),
360 fTestMode(kNone),
361 fReadTestMode(kFALSE),
362 fOutputRedirected(kFALSE)
363 {
364         //
365         // config: AliShuttleConfig used
366         // timeout: timeout used for AliDCSClient connection
367         // retries: the number of retries in case of connection error.
368         //
369
370         if (!fConfig->IsValid()) AliFatal("********** !!!!! Invalid configuration !!!!! **********");
371         for(int iSys=0;iSys<4;iSys++) {
372                 fServer[iSys]=0;
373                 if (iSys < 3)
374                         fFXSlist[iSys].SetOwner(kTRUE);
375         }
376         fPreprocessorMap.SetOwner(kTRUE);
377
378         for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
379                 fFirstUnprocessed[iDet] = kFALSE;
380
381         fMonitoringMutex = new TMutex();
382 }
383
384 //______________________________________________________________________________________________
385 AliShuttle::~AliShuttle()
386 {
387         //
388         // destructor
389         //
390
391         fPreprocessorMap.DeleteAll();
392         for(int iSys=0;iSys<4;iSys++)
393                 if(fServer[iSys]) {
394                         fServer[iSys]->Close();
395                         delete fServer[iSys];
396                         fServer[iSys] = 0;
397                 }
398
399         if (fStatusEntry){
400                 delete fStatusEntry;
401                 fStatusEntry = 0;
402         }
403         
404         if (fMonitoringMutex) 
405         {
406                 delete fMonitoringMutex;
407                 fMonitoringMutex = 0;
408         }
409 }
410
411 //______________________________________________________________________________________________
412 void AliShuttle::RegisterPreprocessor(AliPreprocessor* preprocessor)
413 {
414         //
415         // Registers new AliPreprocessor.
416         // It uses GetName() for indentificator of the pre processor.
417         // The pre processor is registered it there isn't any other
418         // with the same identificator (GetName()).
419         //
420
421         const char* detName = preprocessor->GetName();
422         if(GetDetPos(detName) < 0)
423                 AliFatal(Form("********** !!!!! Invalid detector name: %s !!!!! **********", detName));
424
425         if (fPreprocessorMap.GetValue(detName)) {
426                 AliWarning(Form("AliPreprocessor %s is already registered!", detName));
427                 return;
428         }
429
430         fPreprocessorMap.Add(new TObjString(detName), preprocessor);
431 }
432 //______________________________________________________________________________________________
433 Bool_t AliShuttle::Store(const AliCDBPath& path, TObject* object,
434                 AliCDBMetaData* metaData, Int_t validityStart, Bool_t validityInfinite)
435 {
436         // Stores a CDB object in the storage for offline reconstruction. Objects that are not needed for
437         // offline reconstruction, but should be stored anyway (e.g. for debugging) should NOT be stored
438         // using this function. Use StoreReferenceData instead!
439         // It calls StoreLocally function which temporarily stores the data locally; when the preprocessor
440         // finishes the data are transferred to the main storage (Grid).
441
442         return StoreLocally(fgkLocalCDB, path, object, metaData, validityStart, validityInfinite);
443 }
444
445 //______________________________________________________________________________________________
446 Bool_t AliShuttle::StoreReferenceData(const AliCDBPath& path, TObject* object, AliCDBMetaData* metaData)
447 {
448         // Stores a CDB object in the storage for reference data. This objects will not be available during
449         // offline reconstrunction. Use this function for reference data only!
450         // It calls StoreLocally function which temporarily stores the data locally; when the preprocessor
451         // finishes the data are transferred to the main storage (Grid).
452
453         return StoreLocally(fgkLocalRefStorage, path, object, metaData);
454 }
455
456 //______________________________________________________________________________________________
457 Bool_t AliShuttle::StoreLocally(const TString& localUri,
458                         const AliCDBPath& path, TObject* object, AliCDBMetaData* metaData,
459                         Int_t validityStart, Bool_t validityInfinite)
460 {
461         // Store object temporarily in local storage. Parameters are passed by Store and StoreReferenceData functions.
462         // when the preprocessor finishes the data are transferred to the main storage (Grid).
463         // The parameters are:
464         //   1) Uri of the backup storage (Local)
465         //   2) the object's path.
466         //   3) the object to be stored
467         //   4) the metaData to be associated with the object
468         //   5) the validity start run number w.r.t. the current run,
469         //      if the data is valid only for this run leave the default 0
470         //   6) specifies if the calibration data is valid for infinity (this means until updated),
471         //      typical for calibration runs, the default is kFALSE
472         //
473         // returns 0 if fail, 1 otherwise
474
475         if (fTestMode & kErrorStorage)
476         {
477                 Log(fCurrentDetector, "StoreLocally - In TESTMODE - Simulating error while storing locally");
478                 return kFALSE;
479         }
480         
481         const char* cdbType = (localUri == fgkLocalCDB) ? "CDB" : "Reference";
482
483         Int_t firstRun = GetCurrentRun() - validityStart;
484         if(firstRun < 0) {
485                 AliWarning("First valid run happens to be less than 0! Setting it to 0.");
486                 firstRun=0;
487         }
488
489         Int_t lastRun = -1;
490         if(validityInfinite) {
491                 lastRun = AliCDBRunRange::Infinity();
492         } else {
493                 lastRun = GetCurrentRun();
494         }
495
496         // Version is set to current run, it will be used later to transfer data to Grid
497         AliCDBId id(path, firstRun, lastRun, GetCurrentRun(), -1);
498
499         if(! dynamic_cast<TObjString*> (metaData->GetProperty("RunUsed(TObjString)"))){
500                 TObjString runUsed = Form("%d", GetCurrentRun());
501                 metaData->SetProperty("RunUsed(TObjString)", runUsed.Clone());
502         }
503
504         Bool_t result = kFALSE;
505
506         if (!(AliCDBManager::Instance()->GetStorage(localUri))) {
507                 Log("SHUTTLE", Form("StoreLocally - Cannot activate local %s storage", cdbType));
508         } else {
509                 result = AliCDBManager::Instance()->GetStorage(localUri)
510                                         ->Put(object, id, metaData);
511         }
512
513         if(!result) {
514
515                 Log(fCurrentDetector, Form("StoreLocally - Can't store object <%s>!", id.ToString().Data()));
516         }
517
518         return result;
519 }
520
521 //______________________________________________________________________________________________
522 Bool_t AliShuttle::StoreOCDB()
523 {
524         //
525         // Called when preprocessor ends successfully or when previous storage attempt failed (kStoreError status)
526         // Calls underlying StoreOCDB(const char*) function twice, for OCDB and Reference storage.
527         // Then calls StoreRefFilesToGrid to store reference files. 
528         //
529         
530         if (fTestMode & kErrorGrid)
531         {
532                 Log("SHUTTLE", "StoreOCDB - In TESTMODE - Simulating error while storing in the Grid");
533                 Log(fCurrentDetector, "StoreOCDB - In TESTMODE - Simulating error while storing in the Grid");
534                 return kFALSE;
535         }
536         
537         Log("SHUTTLE","StoreOCDB - Storing OCDB data ...");
538         Bool_t resultCDB = StoreOCDB(fgkMainCDB);
539
540         Log("SHUTTLE","StoreOCDB - Storing reference data ...");
541         Bool_t resultRef = StoreOCDB(fgkMainRefStorage);
542         
543         Log("SHUTTLE","StoreOCDB - Storing reference files ...");
544         Bool_t resultRefFiles = CopyFilesToGrid("reference");
545         
546         Bool_t resultMetadata = kTRUE;
547         if(fCurrentDetector == "GRP") 
548         {
549                 Log("StoreOCDB - SHUTTLE","Storing Run Metadata file ...");
550                 resultMetadata = CopyFilesToGrid("metadata");
551         }
552         
553         return resultCDB && resultRef && resultRefFiles && resultMetadata;
554 }
555
556 //______________________________________________________________________________________________
557 Bool_t AliShuttle::StoreOCDB(const TString& gridURI)
558 {
559         //
560         // Called by StoreOCDB(), performs actual storage to the main OCDB and reference storages (Grid)
561         //
562
563         TObjArray* gridIds=0;
564
565         Bool_t result = kTRUE;
566
567         const char* type = 0;
568         TString localURI;
569         if(gridURI == fgkMainCDB) {
570                 type = "OCDB";
571                 localURI = fgkLocalCDB;
572         } else if(gridURI == fgkMainRefStorage) {
573                 type = "reference";
574                 localURI = fgkLocalRefStorage;
575         } else {
576                 AliError(Form("Invalid storage URI: %s", gridURI.Data()));
577                 return kFALSE;
578         }
579
580         AliCDBManager* man = AliCDBManager::Instance();
581
582         AliCDBStorage *gridSto = man->GetStorage(gridURI);
583         if(!gridSto) {
584                 Log("SHUTTLE",
585                         Form("StoreOCDB - cannot activate main %s storage", type));
586                 return kFALSE;
587         }
588
589         gridIds = gridSto->GetQueryCDBList();
590
591         // get objects previously stored in local CDB
592         AliCDBStorage *localSto = man->GetStorage(localURI);
593         if(!localSto) {
594                 Log("SHUTTLE",
595                         Form("StoreOCDB - cannot activate local %s storage", type));
596                 return kFALSE;
597         }
598         AliCDBPath aPath(GetOfflineDetName(fCurrentDetector.Data()),"*","*");
599         // Local objects were stored with current run as Grid version!
600         TList* localEntries = localSto->GetAll(aPath.GetPath(), GetCurrentRun(), GetCurrentRun());
601         localEntries->SetOwner(1);
602
603         // loop on local stored objects
604         TIter localIter(localEntries);
605         AliCDBEntry *aLocEntry = 0;
606         while((aLocEntry = dynamic_cast<AliCDBEntry*> (localIter.Next()))){
607                 aLocEntry->SetOwner(1);
608                 AliCDBId aLocId = aLocEntry->GetId();
609                 aLocEntry->SetVersion(-1);
610                 aLocEntry->SetSubVersion(-1);
611
612                 // If local object is valid up to infinity we store it only if it is
613                 // the first unprocessed run!
614                 if (aLocId.GetLastRun() == AliCDBRunRange::Infinity() &&
615                         !fFirstUnprocessed[GetDetPos(fCurrentDetector)])
616                 {
617                         Log("SHUTTLE", Form("StoreOCDB - %s: object %s has validity infinite but "
618                                                 "there are previous unprocessed runs!",
619                                                 fCurrentDetector.Data(), aLocId.GetPath().Data()));
620                         continue;
621                 }
622
623                 // loop on Grid valid Id's
624                 Bool_t store = kTRUE;
625                 TIter gridIter(gridIds);
626                 AliCDBId* aGridId = 0;
627                 while((aGridId = dynamic_cast<AliCDBId*> (gridIter.Next()))){
628                         if(aGridId->GetPath() != aLocId.GetPath()) continue;
629                         // skip all objects valid up to infinity
630                         if(aGridId->GetLastRun() == AliCDBRunRange::Infinity()) continue;
631                         // if we get here, it means there's already some more recent object stored on Grid!
632                         store = kFALSE;
633                         break;
634                 }
635
636                 // If we get here, the file can be stored!
637                 Bool_t storeOk = gridSto->Put(aLocEntry);
638                 if(!store || storeOk){
639
640                         if (!store)
641                         {
642                                 Log(fCurrentDetector.Data(),
643                                         Form("StoreOCDB - A more recent object already exists in %s storage: <%s>",
644                                                 type, aGridId->ToString().Data()));
645                         } else {
646                                 Log("SHUTTLE",
647                                         Form("StoreOCDB - Object <%s> successfully put into %s storage",
648                                                 aLocId.ToString().Data(), type));
649                                 Log(fCurrentDetector.Data(),
650                                         Form("StoreOCDB - Object <%s> successfully put into %s storage",
651                                                 aLocId.ToString().Data(), type));
652                         }
653
654                         // removing local filename...
655                         TString filename;
656                         localSto->IdToFilename(aLocId, filename);
657                         Log("SHUTTLE", Form("StoreOCDB - Removing local file %s", filename.Data()));
658                         RemoveFile(filename.Data());
659                         continue;
660                 } else  {
661                         Log("SHUTTLE",
662                                 Form("StoreOCDB - Grid %s storage of object <%s> failed",
663                                         type, aLocId.ToString().Data()));
664                         Log(fCurrentDetector.Data(),
665                                 Form("StoreOCDB - Grid %s storage of object <%s> failed",
666                                         type, aLocId.ToString().Data()));
667                         result = kFALSE;
668                 }
669         }
670         localEntries->Clear();
671
672         return result;
673 }
674
675 //______________________________________________________________________________________________
676 Bool_t AliShuttle::CleanReferenceStorage(const char* detector)
677 {
678         // clears the directory used to store reference files of a given subdetector
679   
680         AliCDBManager* man = AliCDBManager::Instance();
681         AliCDBStorage* sto = man->GetStorage(fgkLocalRefStorage);
682         TString localBaseFolder = sto->GetBaseFolder();
683
684         TString targetDir = GetRefFilePrefix(localBaseFolder.Data(), detector);
685         
686         Log("SHUTTLE", Form("CleanReferenceStorage - Cleaning %s", targetDir.Data()));
687
688         TString begin;
689         begin.Form("%d_", GetCurrentRun());
690         
691         TSystemDirectory* baseDir = new TSystemDirectory("/", targetDir);
692         if (!baseDir)
693                 return kTRUE;
694                 
695         TList* dirList = baseDir->GetListOfFiles();
696         delete baseDir;
697         
698         if (!dirList) return kTRUE;
699                         
700         if (dirList->GetEntries() < 3) 
701         {
702                 delete dirList;
703                 return kTRUE;
704         }
705                                 
706         Int_t nDirs = 0, nDel = 0;
707         TIter dirIter(dirList);
708         TSystemFile* entry = 0;
709
710         Bool_t success = kTRUE;
711         
712         while ((entry = dynamic_cast<TSystemFile*> (dirIter.Next())))
713         {                                       
714                 if (entry->IsDirectory())
715                         continue;
716                 
717                 TString fileName(entry->GetName());
718                 if (!fileName.BeginsWith(begin))
719                         continue;
720                         
721                 nDirs++;
722                                                 
723                 // delete file
724                 Int_t result = gSystem->Unlink(fileName.Data());
725                 
726                 if (result)
727                 {
728                         Log("SHUTTLE", Form("CleanReferenceStorage - Could not delete file %s!", fileName.Data()));
729                         success = kFALSE;
730                 } else {
731                         nDel++;
732                 }
733         }
734
735         if(nDirs > 0)
736                 Log("SHUTTLE", Form("CleanReferenceStorage - %d (over %d) reference files in folder %s were deleted.", 
737                         nDel, nDirs, targetDir.Data()));
738
739                 
740         delete dirList;
741         return success;
742
743
744
745
746
747
748   Int_t result = gSystem->GetPathInfo(targetDir, 0, (Long64_t*) 0, 0, 0);
749   if (result == 0)
750   {
751     // delete directory
752     result = gSystem->Exec(Form("rm -rf %s", targetDir.Data()));
753     if (result != 0)
754     {  
755       Log("SHUTTLE", Form("CleanReferenceStorage - Could not clean directory %s", targetDir.Data()));
756       return kFALSE;
757     }
758   }
759
760   result = gSystem->mkdir(targetDir, kTRUE);
761   if (result != 0)
762   {
763     Log("SHUTTLE", Form("CleanReferenceStorage - Error creating base directory %s", targetDir.Data()));
764     return kFALSE;
765   }
766         
767   return kTRUE;
768 }
769
770 //______________________________________________________________________________________________
771 Bool_t AliShuttle::StoreReferenceFile(const char* detector, const char* localFile, const char* gridFileName)
772 {
773         //
774         // Stores reference file directly (without opening it). This function stores the file locally.
775         //
776         // The file is stored under the following location: 
777         // <base folder of local reference storage>/<DET>/<RUN#>_<gridFileName>
778         // where <gridFileName> is the second parameter given to the function
779         // 
780         
781         if (fTestMode & kErrorStorage)
782         {
783                 Log(fCurrentDetector, "StoreReferenceFile - In TESTMODE - Simulating error while storing locally");
784                 return kFALSE;
785         }
786         
787         AliCDBManager* man = AliCDBManager::Instance();
788         AliCDBStorage* sto = man->GetStorage(fgkLocalRefStorage);
789         
790         TString localBaseFolder = sto->GetBaseFolder();
791         
792         TString target = GetRefFilePrefix(localBaseFolder.Data(), detector);    
793         target.Append(Form("/%d_%s", GetCurrentRun(), gridFileName));
794         
795         return CopyFileLocally(localFile, target);
796 }
797
798 //______________________________________________________________________________________________
799 Bool_t AliShuttle::StoreRunMetadataFile(const char* localFile, const char* gridFileName)
800 {
801         //
802         // Stores Run metadata file to the Grid, in the run folder
803         //
804         // Only GRP can call this function.
805         
806         if (fTestMode & kErrorStorage)
807         {
808                 Log(fCurrentDetector, "StoreRunMetaDataFile - In TESTMODE - Simulating error while storing locally");
809                 return kFALSE;
810         }
811         
812         AliCDBManager* man = AliCDBManager::Instance();
813         AliCDBStorage* sto = man->GetStorage(fgkLocalRefStorage);
814         
815         TString localBaseFolder = sto->GetBaseFolder();
816         
817         // Build Run level folder
818         // folder = /alice/data/year/lhcPeriod/runNb/Raw
819         
820                 
821         TString lhcPeriod = GetLHCPeriod();     
822         if (lhcPeriod.Length() == 0) 
823         {
824                 Log("SHUTTLE","StoreRunMetaDataFile - LHCPeriod not found in logbook!");
825                 return 0;
826         }
827         
828         TString target = Form("%s/GRP/RunMetadata/alice/data/%d/%s/%09d/Raw/%s", 
829                                 localBaseFolder.Data(), GetCurrentYear(), 
830                                 lhcPeriod.Data(), GetCurrentRun(), gridFileName);
831                                         
832         return CopyFileLocally(localFile, target);
833 }
834
835 //______________________________________________________________________________________________
836 Bool_t AliShuttle::CopyFileLocally(const char* localFile, const TString& target)
837 {
838         //
839         // Stores file locally. Called by StoreReferenceFile and StoreRunMetadataFile
840         // Files are temporarily stored in the local reference storage. When the preprocessor 
841         // finishes, the Shuttle calls CopyFilesToGrid to transfer the files to AliEn 
842         // (in reference or run level folders)
843         //
844         
845         TString targetDir(target(0, target.Last('/')));
846         
847         //try to open base dir folder, if it does not exist
848         void* dir = gSystem->OpenDirectory(targetDir.Data());
849         if (dir == NULL) {
850                 if (gSystem->mkdir(targetDir.Data(), kTRUE)) {
851                         Log("SHUTTLE", Form("StoreFileLocally - Can't open directory <%s>", targetDir.Data()));
852                         return kFALSE;
853                 }
854
855         } else {
856                 gSystem->FreeDirectory(dir);
857         }
858         
859         Int_t result = 0;
860         
861         result = gSystem->GetPathInfo(localFile, 0, (Long64_t*) 0, 0, 0);
862         if (result)
863         {
864                 Log("SHUTTLE", Form("StoreFileLocally - %s does not exist", localFile));
865                 return kFALSE;
866         }
867
868         result = gSystem->GetPathInfo(target, 0, (Long64_t*) 0, 0, 0);
869         if (!result)
870         {
871                 Log("SHUTTLE", Form("StoreFileLocally - target file %s already exist, removing...", target.Data()));
872                 if (gSystem->Unlink(target.Data()))
873                 {
874                         Log("SHUTTLE", Form("StoreFileLocally - Could not remove existing target file %s!", target.Data()));
875                         return kFALSE;
876                 }
877         }       
878         
879         result = gSystem->CopyFile(localFile, target);
880
881         if (result == 0)
882         {
883                 Log("SHUTTLE", Form("StoreFileLocally - File %s stored locally to %s", localFile, target.Data()));
884                 return kTRUE;
885         }
886         else
887         {
888                 Log("SHUTTLE", Form("StoreFileLocally - Could not store file %s to %s! Error code = %d", 
889                                 localFile, target.Data(), result));
890                 return kFALSE;
891         }       
892
893
894
895 }
896
897 //______________________________________________________________________________________________
898 Bool_t AliShuttle::CopyFilesToGrid(const char* type)
899 {
900         //
901         // Transfers local files to the Grid. Local files can be reference files 
902         // or run metadata file (from GRP only).
903         //
904         // According to the type (ref, metadata) the files are stored under the following location: 
905         // ref --> <base folder of reference storage>/<DET>/<RUN#>_<gridFileName>
906         // metadata --> <run data folder>/<MetadataFileName>
907         //
908                 
909         AliCDBManager* man = AliCDBManager::Instance();
910         AliCDBStorage* sto = man->GetStorage(fgkLocalRefStorage);
911         if (!sto)
912                 return kFALSE;
913         TString localBaseFolder = sto->GetBaseFolder();
914         
915         TString dir;
916         TString alienDir;
917         TString begin;
918         
919         if (strcmp(type, "reference") == 0) 
920         {
921                 dir = GetRefFilePrefix(localBaseFolder.Data(), fCurrentDetector.Data());
922                 AliCDBStorage* gridSto = man->GetStorage(fgkMainRefStorage);
923                 if (!gridSto)
924                         return kFALSE;
925                 TString gridBaseFolder = gridSto->GetBaseFolder();
926                 alienDir = GetRefFilePrefix(gridBaseFolder.Data(), fCurrentDetector.Data());
927                 begin = Form("%d_", GetCurrentRun());
928         } 
929         else if (strcmp(type, "metadata") == 0)
930         {
931                         
932                 TString lhcPeriod = GetLHCPeriod();
933         
934                 if (lhcPeriod.Length() == 0) 
935                 {
936                         Log("SHUTTLE","CopyFilesToGrid - LHCPeriod not found in logbook!");
937                         return 0;
938                 }
939                 
940                 dir = Form("%s/GRP/RunMetadata/alice/data/%d/%s/%09d/Raw", 
941                                 localBaseFolder.Data(), GetCurrentYear(), 
942                                 lhcPeriod.Data(), GetCurrentRun());
943                 alienDir = dir(dir.Index("/alice/data/"), dir.Length());
944                 
945                 begin = "";
946         }
947         else 
948         {
949                 Log("SHUTTLE", "CopyFilesToGrid - Unexpected: type label must be reference or metadata!");
950                 return kFALSE;
951         }
952                 
953         TSystemDirectory* baseDir = new TSystemDirectory("/", dir);
954         if (!baseDir)
955                 return kTRUE;
956                 
957         TList* dirList = baseDir->GetListOfFiles();
958         delete baseDir;
959         
960         if (!dirList) return kTRUE;
961                 
962         if (dirList->GetEntries() < 3) 
963         {
964                 delete dirList;
965                 return kTRUE;
966         }
967                         
968         if (!gGrid)
969         { 
970                 Log("SHUTTLE", "CopyFilesToGrid - Connection to Grid failed: Cannot continue!");
971                 delete dirList;
972                 return kFALSE;
973         }
974         
975         Int_t nDirs = 0, nTransfer = 0;
976         TIter dirIter(dirList);
977         TSystemFile* entry = 0;
978
979         Bool_t success = kTRUE;
980         Bool_t first = kTRUE;
981         
982         while ((entry = dynamic_cast<TSystemFile*> (dirIter.Next())))
983         {                       
984                 if (entry->IsDirectory())
985                         continue;
986                         
987                 TString fileName(entry->GetName());
988                 if (!fileName.BeginsWith(begin))
989                         continue;
990                         
991                 nDirs++;
992                         
993                 if (first)
994                 {
995                         first = kFALSE;
996                         // check that folder exists, otherwise create it
997                         TGridResult* result = gGrid->Ls(alienDir.Data(), "a");
998                         
999                         if (!result)
1000                         {
1001                                 delete dirList;
1002                                 return kFALSE;
1003                         }
1004                         
1005                         if (!result->GetFileName(1)) // TODO: It looks like element 0 is always 0!!
1006                         {
1007                                 // TODO It does not work currently! Bug in TAliEn::Mkdir
1008                                 // TODO Manually fixed in local root v5-16-00
1009                                 if (!gGrid->Mkdir(alienDir.Data(),"-p",0))
1010                                 {
1011                                         Log("SHUTTLE", Form("CopyFilesToGrid - Cannot create directory %s",
1012                                                         alienDir.Data()));
1013                                         delete dirList;
1014                                         return kFALSE;
1015                                 } else {
1016                                         Log("SHUTTLE",Form("CopyFilesToGrid - Folder %s created", alienDir.Data()));
1017                                 }
1018                                 
1019                         } else {
1020                                         Log("SHUTTLE",Form("CopyFilesToGrid - Folder %s found", alienDir.Data()));
1021                         }
1022                 }
1023                         
1024                 TString fullLocalPath;
1025                 fullLocalPath.Form("%s/%s", dir.Data(), fileName.Data());
1026                 
1027                 TString fullGridPath;
1028                 fullGridPath.Form("alien://%s/%s", alienDir.Data(), fileName.Data());
1029
1030                 Bool_t result = TFile::Cp(fullLocalPath, fullGridPath);
1031                 
1032                 if (result)
1033                 {
1034                         Log("SHUTTLE", Form("CopyFilesToGrid - Copying local file %s to %s succeeded!", 
1035                                                 fullLocalPath.Data(), fullGridPath.Data()));
1036                         RemoveFile(fullLocalPath);
1037                         nTransfer++;
1038                 }
1039                 else
1040                 {
1041                         Log("SHUTTLE", Form("CopyFilesToGrid - Copying local file %s to %s FAILED!", 
1042                                                 fullLocalPath.Data(), fullGridPath.Data()));
1043                         success = kFALSE;
1044                 }
1045         }
1046
1047         Log("SHUTTLE", Form("CopyFilesToGrid - %d (over %d) files in folder %s copied to Grid.", 
1048                                                 nTransfer, nDirs, dir.Data()));
1049
1050                 
1051         delete dirList;
1052         return success;
1053 }
1054
1055 //______________________________________________________________________________________________
1056 const char* AliShuttle::GetRefFilePrefix(const char* base, const char* detector)
1057 {
1058         //
1059         // Get folder name of reference files 
1060         //
1061
1062         TString offDetStr(GetOfflineDetName(detector));
1063         TString dir;
1064         if (offDetStr == "ITS" || offDetStr == "MUON" || offDetStr == "PHOS")
1065         {
1066                 dir.Form("%s/%s/%s", base, offDetStr.Data(), detector);
1067         } else {
1068                 dir.Form("%s/%s", base, offDetStr.Data());
1069         }
1070         
1071         return dir.Data();
1072         
1073
1074 }
1075
1076 //______________________________________________________________________________________________
1077 void AliShuttle::CleanLocalStorage(const TString& uri)
1078 {
1079         //
1080         // Called in case the preprocessor is declared failed. Remove remaining objects from the local storages.
1081         //
1082
1083         const char* type = 0;
1084         if(uri == fgkLocalCDB) {
1085                 type = "OCDB";
1086         } else if(uri == fgkLocalRefStorage) {
1087                 type = "Reference";
1088         } else {
1089                 AliError(Form("Invalid storage URI: %s", uri.Data()));
1090                 return;
1091         }
1092
1093         AliCDBManager* man = AliCDBManager::Instance();
1094
1095         // open local storage
1096         AliCDBStorage *localSto = man->GetStorage(uri);
1097         if(!localSto) {
1098                 Log("SHUTTLE",
1099                         Form("CleanLocalStorage - cannot activate local %s storage", type));
1100                 return;
1101         }
1102
1103         TString filename(Form("%s/%s/*/Run*_v%d_s*.root",
1104                 localSto->GetBaseFolder().Data(), GetOfflineDetName(fCurrentDetector.Data()), GetCurrentRun()));
1105
1106         AliDebug(2, Form("filename = %s", filename.Data()));
1107
1108         Log("SHUTTLE", Form("Removing remaining local files for run %d and detector %s ...",
1109                 GetCurrentRun(), fCurrentDetector.Data()));
1110
1111         RemoveFile(filename.Data());
1112
1113 }
1114
1115 //______________________________________________________________________________________________
1116 void AliShuttle::RemoveFile(const char* filename)
1117 {
1118         //
1119         // removes local file
1120         //
1121
1122         TString command(Form("rm -f %s", filename));
1123
1124         Int_t result = gSystem->Exec(command.Data());
1125         if(result != 0)
1126         {
1127                 Log("SHUTTLE", Form("RemoveFile - %s: Cannot remove file %s!",
1128                         fCurrentDetector.Data(), filename));
1129         }
1130 }
1131
1132 //______________________________________________________________________________________________
1133 AliShuttleStatus* AliShuttle::ReadShuttleStatus()
1134 {
1135         //
1136         // Reads the AliShuttleStatus from the CDB
1137         //
1138
1139         if (fStatusEntry){
1140                 delete fStatusEntry;
1141                 fStatusEntry = 0;
1142         }
1143
1144         fStatusEntry = AliCDBManager::Instance()->GetStorage(GetLocalCDB())
1145                 ->Get(Form("/SHUTTLE/STATUS/%s", fCurrentDetector.Data()), GetCurrentRun());
1146
1147         if (!fStatusEntry) return 0;
1148         fStatusEntry->SetOwner(1);
1149
1150         AliShuttleStatus* status = dynamic_cast<AliShuttleStatus*> (fStatusEntry->GetObject());
1151         if (!status) {
1152                 AliError("Invalid object stored to CDB!");
1153                 return 0;
1154         }
1155
1156         return status;
1157 }
1158
1159 //______________________________________________________________________________________________
1160 Bool_t AliShuttle::WriteShuttleStatus(AliShuttleStatus* status)
1161 {
1162         //
1163         // writes the status for one subdetector
1164         //
1165
1166         if (fStatusEntry){
1167                 delete fStatusEntry;
1168                 fStatusEntry = 0;
1169         }
1170
1171         Int_t run = GetCurrentRun();
1172
1173         AliCDBId id(AliCDBPath("SHUTTLE", "STATUS", fCurrentDetector), run, run);
1174
1175         fStatusEntry = new AliCDBEntry(status, id, new AliCDBMetaData);
1176         fStatusEntry->SetOwner(1);
1177
1178         UInt_t result = AliCDBManager::Instance()->GetStorage(fgkLocalCDB)->Put(fStatusEntry);
1179
1180         if (!result) {
1181                 Log("SHUTTLE", Form("WriteShuttleStatus - Failed for %s, run %d",
1182                                                 fCurrentDetector.Data(), run));
1183                 return kFALSE;
1184         }
1185         
1186         SendMLInfo();
1187
1188         return kTRUE;
1189 }
1190
1191 //______________________________________________________________________________________________
1192 void AliShuttle::UpdateShuttleStatus(AliShuttleStatus::Status newStatus, Bool_t increaseCount)
1193 {
1194         //
1195         // changes the AliShuttleStatus for the given detector and run to the given status
1196         //
1197
1198         if (!fStatusEntry){
1199                 AliError("UNEXPECTED: fStatusEntry empty");
1200                 return;
1201         }
1202
1203         AliShuttleStatus* status = dynamic_cast<AliShuttleStatus*> (fStatusEntry->GetObject());
1204
1205         if (!status){
1206                 Log("SHUTTLE", "UpdateShuttleStatus - UNEXPECTED: status could not be read from current CDB entry");
1207                 return;
1208         }
1209
1210         TString actionStr = Form("UpdateShuttleStatus - %s: Changing state from %s to %s",
1211                                 fCurrentDetector.Data(),
1212                                 status->GetStatusName(),
1213                                 status->GetStatusName(newStatus));
1214         Log("SHUTTLE", actionStr);
1215         SetLastAction(actionStr);
1216
1217         status->SetStatus(newStatus);
1218         if (increaseCount) status->IncreaseCount();
1219
1220         AliCDBManager::Instance()->GetStorage(fgkLocalCDB)->Put(fStatusEntry);
1221
1222         SendMLInfo();
1223 }
1224
1225 //______________________________________________________________________________________________
1226 void AliShuttle::SendMLInfo()
1227 {
1228         //
1229         // sends ML information about the current status of the current detector being processed
1230         //
1231         
1232         AliShuttleStatus* status = dynamic_cast<AliShuttleStatus*> (fStatusEntry->GetObject());
1233         
1234         if (!status){
1235                 Log("SHUTTLE", "SendMLInfo - UNEXPECTED: status could not be read from current CDB entry");
1236                 return;
1237         }
1238         
1239         TMonaLisaText  mlStatus(Form("%s_status", fCurrentDetector.Data()), status->GetStatusName());
1240         TMonaLisaValue mlRetryCount(Form("%s_count", fCurrentDetector.Data()), status->GetCount());
1241
1242         TList mlList;
1243         mlList.Add(&mlStatus);
1244         mlList.Add(&mlRetryCount);
1245
1246         fMonaLisa->SendParameters(&mlList);
1247 }
1248
1249 //______________________________________________________________________________________________
1250 Bool_t AliShuttle::ContinueProcessing()
1251 {
1252         // this function reads the AliShuttleStatus information from CDB and
1253         // checks if the processing should be continued
1254         // if yes it returns kTRUE and updates the AliShuttleStatus with nextStatus
1255
1256         if (!fConfig->HostProcessDetector(fCurrentDetector)) return kFALSE;
1257
1258         AliPreprocessor* aPreprocessor =
1259                 dynamic_cast<AliPreprocessor*> (fPreprocessorMap.GetValue(fCurrentDetector));
1260         if (!aPreprocessor)
1261         {
1262                 Log("SHUTTLE", Form("ContinueProcessing - %s: no preprocessor registered", fCurrentDetector.Data()));
1263                 return kFALSE;
1264         }
1265
1266         AliShuttleLogbookEntry::Status entryStatus =
1267                 fLogbookEntry->GetDetectorStatus(fCurrentDetector);
1268
1269         if(entryStatus != AliShuttleLogbookEntry::kUnprocessed) {
1270                 Log("SHUTTLE", Form("ContinueProcessing - %s is %s",
1271                                 fCurrentDetector.Data(),
1272                                 fLogbookEntry->GetDetectorStatusName(entryStatus)));
1273                 return kFALSE;
1274         }
1275
1276         // if we get here, according to Shuttle logbook subdetector is in UNPROCESSED state
1277
1278         // check if current run is first unprocessed run for current detector
1279         if (fConfig->StrictRunOrder(fCurrentDetector) &&
1280                 !fFirstUnprocessed[GetDetPos(fCurrentDetector)])
1281         {
1282                 if (fTestMode == kNone)
1283                 {
1284                         Log("SHUTTLE", Form("ContinueProcessing - %s requires strict run ordering"
1285                                         " but this is not the first unprocessed run!"));
1286                         return kFALSE;
1287                 }
1288                 else
1289                 {
1290                         Log("SHUTTLE", Form("ContinueProcessing - In TESTMODE - "
1291                                         "Although %s requires strict run ordering "
1292                                         "and this is not the first unprocessed run, "
1293                                         "the SHUTTLE continues"));
1294                 }
1295         }
1296
1297         AliShuttleStatus* status = ReadShuttleStatus();
1298         if (!status) {
1299                 // first time
1300                 Log("SHUTTLE", Form("ContinueProcessing - %s: Processing first time",
1301                                 fCurrentDetector.Data()));
1302                 status = new AliShuttleStatus(AliShuttleStatus::kStarted);
1303                 return WriteShuttleStatus(status);
1304         }
1305
1306         // The following two cases shouldn't happen if Shuttle Logbook was correctly updated.
1307         // If it happens it may mean Logbook updating failed... let's do it now!
1308         if (status->GetStatus() == AliShuttleStatus::kDone ||
1309             status->GetStatus() == AliShuttleStatus::kFailed){
1310                 Log("SHUTTLE", Form("ContinueProcessing - %s is already %s. Updating Shuttle Logbook",
1311                                         fCurrentDetector.Data(),
1312                                         status->GetStatusName(status->GetStatus())));
1313                 UpdateShuttleLogbook(fCurrentDetector.Data(),
1314                                         status->GetStatusName(status->GetStatus()));
1315                 return kFALSE;
1316         }
1317
1318         if (status->GetStatus() == AliShuttleStatus::kStoreError) {
1319                 Log("SHUTTLE",
1320                         Form("ContinueProcessing - %s: Grid storage of one or more "
1321                                 "objects failed. Trying again now",
1322                                 fCurrentDetector.Data()));
1323                 UpdateShuttleStatus(AliShuttleStatus::kStoreStarted);
1324                 if (StoreOCDB()){
1325                         Log("SHUTTLE", Form("ContinueProcessing - %s: all objects "
1326                                 "successfully stored into main storage",
1327                                 fCurrentDetector.Data()));
1328                         UpdateShuttleStatus(AliShuttleStatus::kDone);
1329                         UpdateShuttleLogbook(fCurrentDetector.Data(), "DONE");
1330                 } else {
1331                         Log("SHUTTLE",
1332                                 Form("ContinueProcessing - %s: Grid storage failed again",
1333                                         fCurrentDetector.Data()));
1334                         UpdateShuttleStatus(AliShuttleStatus::kStoreError);
1335                 }
1336                 return kFALSE;
1337         }
1338
1339         // if we get here, there is a restart
1340         Bool_t cont = kFALSE;
1341
1342         // abort conditions
1343         if (status->GetCount() >= fConfig->GetMaxRetries()) {
1344                 Log("SHUTTLE", Form("ContinueProcessing - %s failed %d times in status %s - "
1345                                 "Updating Shuttle Logbook", fCurrentDetector.Data(),
1346                                 status->GetCount(), status->GetStatusName()));
1347                 UpdateShuttleLogbook(fCurrentDetector.Data(), "FAILED");
1348                 UpdateShuttleStatus(AliShuttleStatus::kFailed);
1349
1350                 // there may still be objects in local OCDB and reference storage
1351                 // and FXS databases may be not updated: do it now!
1352                 
1353                 // TODO Currently disabled, we want to keep files in case of failure!
1354                 // CleanLocalStorage(fgkLocalCDB);
1355                 // CleanLocalStorage(fgkLocalRefStorage);
1356                 // UpdateTableFailCase();
1357                 
1358                 // Send mail to detector expert!
1359                 Log("SHUTTLE", Form("ContinueProcessing - Sending mail to %s expert...", 
1360                                         fCurrentDetector.Data()));
1361                 if (!SendMail())
1362                         Log("SHUTTLE", Form("ContinueProcessing - Could not send mail to %s expert",
1363                                         fCurrentDetector.Data()));
1364
1365         } else {
1366                 Log("SHUTTLE", Form("ContinueProcessing - %s: restarting. "
1367                                 "Aborted before with %s. Retry number %d.", fCurrentDetector.Data(),
1368                                 status->GetStatusName(), status->GetCount()));
1369                 Bool_t increaseCount = kTRUE;
1370                 if (status->GetStatus() == AliShuttleStatus::kDCSError || 
1371                         status->GetStatus() == AliShuttleStatus::kDCSStarted)
1372                                 increaseCount = kFALSE;
1373                                 
1374                 UpdateShuttleStatus(AliShuttleStatus::kStarted, increaseCount);
1375                 cont = kTRUE;
1376         }
1377
1378         return cont;
1379 }
1380
1381 //______________________________________________________________________________________________
1382 Bool_t AliShuttle::Process(AliShuttleLogbookEntry* entry)
1383 {
1384         //
1385         // Makes data retrieval for all detectors in the configuration.
1386         // entry: Shuttle logbook entry, contains run paramenters and status of detectors
1387         // (Unprocessed, Inactive, Failed or Done).
1388         // Returns kFALSE in case of error occured and kTRUE otherwise
1389         //
1390
1391         if (!entry) return kFALSE;
1392
1393         fLogbookEntry = entry;
1394
1395         Log("SHUTTLE", Form("\t\t\t^*^*^*^*^*^*^*^*^*^*^*^* run %d: START ^*^*^*^*^*^*^*^*^*^*^*^*",
1396                                         GetCurrentRun()));
1397
1398         // create ML instance that monitors this run
1399         fMonaLisa = new TMonaLisaWriter(fConfig->GetMonitorHost(), fConfig->GetMonitorTable(), Form("%d", GetCurrentRun()));
1400
1401         // Send the information to ML
1402         TMonaLisaText  mlStatus("SHUTTLE_status", "Processing");
1403         TMonaLisaText  mlRunType("SHUTTLE_runtype", Form("%s (%s)", entry->GetRunType(), entry->GetRunParameter("log")));
1404
1405         TList mlList;
1406         mlList.Add(&mlStatus);
1407         mlList.Add(&mlRunType);
1408
1409         fMonaLisa->SendParameters(&mlList);
1410
1411         if (fLogbookEntry->IsDone())
1412         {
1413                 Log("SHUTTLE","Process - Shuttle is already DONE. Updating logbook");
1414                 UpdateShuttleLogbook("shuttle_done");
1415                 fLogbookEntry = 0;
1416                 return kTRUE;
1417         }
1418
1419         // read test mode if flag is set
1420         if (fReadTestMode)
1421         {
1422                 fTestMode = kNone;
1423                 TString logEntry(entry->GetRunParameter("log"));
1424                 //printf("log entry = %s\n", logEntry.Data());
1425                 TString searchStr("Testmode: ");
1426                 Int_t pos = logEntry.Index(searchStr.Data());
1427                 //printf("%d\n", pos);
1428                 if (pos >= 0)
1429                 {
1430                         TSubString subStr = logEntry(pos + searchStr.Length(), logEntry.Length());
1431                         //printf("%s\n", subStr.String().Data());
1432                         TString newStr(subStr.Data());
1433                         TObjArray* token = newStr.Tokenize(' ');
1434                         if (token)
1435                         {
1436                                 //token->Print();
1437                                 TObjString* tmpStr = dynamic_cast<TObjString*> (token->First());
1438                                 if (tmpStr)
1439                                 {
1440                                         Int_t testMode = tmpStr->String().Atoi();
1441                                         if (testMode > 0)
1442                                         {
1443                                                 Log("SHUTTLE", Form("Process - Enabling test mode %d", testMode));
1444                                                 SetTestMode((TestMode) testMode);
1445                                         }
1446                                 }
1447                                 delete token;          
1448                         }
1449                 }
1450         }
1451                 
1452         fLogbookEntry->Print("all");
1453
1454         // Initialization
1455         Bool_t hasError = kFALSE;
1456
1457         // Set the CDB and Reference folders according to the year and LHC period
1458         TString lhcPeriod(GetLHCPeriod());
1459         if (lhcPeriod.Length() == 0) 
1460         {
1461                 Log("SHUTTLE","StoreRunMetaDataFile - LHCPeriod not found in logbook!");
1462                 return 0;
1463         }       
1464         
1465         if (fgkMainCDB.Length() == 0)
1466                 fgkMainCDB = Form("alien://folder=/alice/data/%d/%s/OCDB?user=alidaq?cacheFold=/tmp/OCDBCache", 
1467                                         GetCurrentYear(), lhcPeriod.Data());
1468         
1469         if (fgkMainRefStorage.Length() == 0)
1470                 fgkMainRefStorage = Form("alien://folder=/alice/data/%d/%s/Reference?user=alidaq?cacheFold=/tmp/OCDBCache", 
1471                                         GetCurrentYear(), lhcPeriod.Data());
1472         
1473         AliCDBStorage *mainCDBSto = AliCDBManager::Instance()->GetStorage(fgkMainCDB);
1474         if(mainCDBSto) mainCDBSto->QueryCDB(GetCurrentRun());
1475         AliCDBStorage *mainRefSto = AliCDBManager::Instance()->GetStorage(fgkMainRefStorage);
1476         if(mainRefSto) mainRefSto->QueryCDB(GetCurrentRun());
1477
1478         // Loop on detectors in the configuration
1479         TIter iter(fConfig->GetDetectors());
1480         TObjString* aDetector = 0;
1481
1482         while ((aDetector = (TObjString*) iter.Next()))
1483         {
1484                 fCurrentDetector = aDetector->String();
1485
1486                 if (ContinueProcessing() == kFALSE) continue;
1487
1488                 Log("SHUTTLE", Form("\t\t\t****** run %d - %s: START  ******",
1489                                                 GetCurrentRun(), aDetector->GetName()));
1490
1491                 for(Int_t iSys=0;iSys<3;iSys++) fFXSCalled[iSys]=kFALSE;
1492
1493                 Log(fCurrentDetector.Data(), "Process - Starting processing");
1494
1495                 Int_t pid = fork();
1496
1497                 if (pid < 0)
1498                 {
1499                         Log("SHUTTLE", "Process - ERROR: Forking failed");
1500                 }
1501                 else if (pid > 0)
1502                 {
1503                         // parent
1504                         Log("SHUTTLE", Form("Process - In parent process of %d - %s: Starting monitoring",
1505                                                         GetCurrentRun(), aDetector->GetName()));
1506
1507                         Long_t begin = time(0);
1508
1509                         int status; // to be used with waitpid, on purpose an int (not Int_t)!
1510                         while (waitpid(pid, &status, WNOHANG) == 0)
1511                         {
1512                                 Long_t expiredTime = time(0) - begin;
1513
1514                                 if (expiredTime > fConfig->GetPPTimeOut())
1515                                 {
1516                                         TString tmp;
1517                                         tmp.Form("Process - Process of %s time out. "
1518                                                         "Run time: %d seconds. Killing...",
1519                                                         fCurrentDetector.Data(), expiredTime);
1520                                         Log("SHUTTLE", tmp);
1521                                         Log(fCurrentDetector, tmp);
1522
1523                                         kill(pid, 9);
1524
1525                                         UpdateShuttleStatus(AliShuttleStatus::kPPTimeOut);
1526                                         hasError = kTRUE;
1527
1528                                         gSystem->Sleep(1000);
1529                                 }
1530                                 else
1531                                 {
1532                                         gSystem->Sleep(1000);
1533                                         
1534                                         TString checkStr;
1535                                         checkStr.Form("ps -o vsize --pid %d | tail -n 1", pid);
1536                                         FILE* pipe = gSystem->OpenPipe(checkStr, "r");
1537                                         if (!pipe)
1538                                         {
1539                                                 Log("SHUTTLE", Form("Process - Error: "
1540                                                         "Could not open pipe to %s", checkStr.Data()));
1541                                                 continue;
1542                                         }
1543                                                 
1544                                         char buffer[100];
1545                                         if (!fgets(buffer, 100, pipe))
1546                                         {
1547                                                 Log("SHUTTLE", "Process - Error: ps did not return anything");
1548                                                 gSystem->ClosePipe(pipe);
1549                                                 continue;
1550                                         }
1551                                         gSystem->ClosePipe(pipe);
1552                                         
1553                                         //Log("SHUTTLE", Form("ps returned %s", buffer));
1554                                         
1555                                         Int_t mem = 0;
1556                                         if ((sscanf(buffer, "%d\n", &mem) != 1) || !mem)
1557                                         {
1558                                                 Log("SHUTTLE", "Process - Error: Could not parse output of ps");
1559                                                 continue;
1560                                         }
1561                                         
1562                                         if (expiredTime % 60 == 0)
1563                                                 Log("SHUTTLE", Form("Process - %s: Checking process. "
1564                                                         "Run time: %d seconds - Memory consumption: %d KB",
1565                                                         fCurrentDetector.Data(), expiredTime, mem));
1566                                         
1567                                         if (mem > fConfig->GetPPMaxMem())
1568                                         {
1569                                                 TString tmp;
1570                                                 tmp.Form("Process - Process exceeds maximum allowed memory "
1571                                                         "(%d KB > %d KB). Killing...",
1572                                                         mem, fConfig->GetPPMaxMem());
1573                                                 Log("SHUTTLE", tmp);
1574                                                 Log(fCurrentDetector, tmp);
1575         
1576                                                 kill(pid, 9);
1577         
1578                                                 UpdateShuttleStatus(AliShuttleStatus::kPPOutOfMemory);
1579                                                 hasError = kTRUE;
1580         
1581                                                 gSystem->Sleep(1000);
1582                                         }
1583                                 }
1584                         }
1585
1586                         Log("SHUTTLE", Form("Process - In parent process of %d - %s: Client has terminated.",
1587                                                                 GetCurrentRun(), aDetector->GetName()));
1588
1589                         if (WIFEXITED(status))
1590                         {
1591                                 Int_t returnCode = WEXITSTATUS(status);
1592
1593                                 Log("SHUTTLE", Form("Process - %s: the return code is %d", fCurrentDetector.Data(),
1594                                                                                 returnCode));
1595
1596                                 if (returnCode == 0) hasError = kTRUE;
1597                         }
1598                 }
1599                 else if (pid == 0)
1600                 {
1601                         // client
1602                         Log("SHUTTLE", Form("Process - In client process of %d - %s", GetCurrentRun(),
1603                                 aDetector->GetName()));
1604
1605                         Log("SHUTTLE", Form("Process - Redirecting output to %s log",fCurrentDetector.Data()));
1606
1607                         if ((freopen(GetLogFileName(fCurrentDetector), "a", stdout)) == 0)
1608                         {
1609                                 Log("SHUTTLE", "Process - Could not freopen stdout");
1610                         }
1611                         else
1612                         {
1613                                 fOutputRedirected = kTRUE;
1614                                 if ((dup2(fileno(stdout), fileno(stderr))) < 0)
1615                                         Log("SHUTTLE", "Process - Could not redirect stderr");
1616                                 
1617                         }
1618                         
1619                         TString wd = gSystem->WorkingDirectory();
1620                         TString tmpDir = Form("%s/%s_%d_process", GetShuttleTempDir(), 
1621                                 fCurrentDetector.Data(), GetCurrentRun());
1622                         
1623                         Int_t result = gSystem->GetPathInfo(tmpDir.Data(), 0, (Long64_t*) 0, 0, 0);
1624                         if (!result) // temp dir already exists!
1625                         {
1626                                 Log(fCurrentDetector.Data(), 
1627                                         Form("Process - %s dir already exists! Removing...", tmpDir.Data()));
1628                                 gSystem->Exec(Form("rm -rf %s",tmpDir.Data()));         
1629                         } 
1630                         
1631                         if (gSystem->mkdir(tmpDir.Data(), 1))
1632                         {
1633                                 Log(fCurrentDetector.Data(), "Process - could not make temp directory!!");
1634                                 gSystem->Exit(1);
1635                         }
1636                         
1637                         if (!gSystem->ChangeDirectory(tmpDir.Data())) 
1638                         {
1639                                 Log(fCurrentDetector.Data(), "Process - could not change directory!!");
1640                                 gSystem->Exit(1);                       
1641                         }
1642                         
1643                         Bool_t success = ProcessCurrentDetector();
1644                         
1645                         gSystem->ChangeDirectory(wd.Data());
1646                                                 
1647                         if (success) // Preprocessor finished successfully!
1648                         { 
1649                                 // remove temporary folder
1650                                 gSystem->Exec(Form("rm -rf %s",tmpDir.Data()));
1651                                 
1652                                 // Update time_processed field in FXS DB
1653                                 if (UpdateTable() == kFALSE)
1654                                         Log("SHUTTLE", Form("Process - %s: Could not update FXS databases!", 
1655                                                         fCurrentDetector.Data()));
1656
1657                                 // Transfer the data from local storage to main storage (Grid)
1658                                 UpdateShuttleStatus(AliShuttleStatus::kStoreStarted);
1659                                 if (StoreOCDB() == kFALSE)
1660                                 {
1661                                         Log("SHUTTLE", 
1662                                                 Form("\t\t\t****** run %d - %s: STORAGE ERROR ******",
1663                                                         GetCurrentRun(), aDetector->GetName()));
1664                                         UpdateShuttleStatus(AliShuttleStatus::kStoreError);
1665                                         success = kFALSE;
1666                                 } else {
1667                                         Log("SHUTTLE", 
1668                                                 Form("\t\t\t****** run %d - %s: DONE ******",
1669                                                         GetCurrentRun(), aDetector->GetName()));
1670                                         UpdateShuttleStatus(AliShuttleStatus::kDone);
1671                                         UpdateShuttleLogbook(fCurrentDetector, "DONE");
1672                                 }
1673                         } else 
1674                         {
1675                                 Log("SHUTTLE", 
1676                                         Form("\t\t\t****** run %d - %s: PP ERROR ******",
1677                                                 GetCurrentRun(), aDetector->GetName()));
1678                         }
1679
1680                         for (UInt_t iSys=0; iSys<3; iSys++)
1681                         {
1682                                 if (fFXSCalled[iSys]) fFXSlist[iSys].Clear();
1683                         }
1684
1685                         Log("SHUTTLE", Form("Process - Client process of %d - %s is exiting now with %d.",
1686                                                         GetCurrentRun(), aDetector->GetName(), success));
1687
1688                         // the client exits here
1689                         gSystem->Exit(success);
1690
1691                         AliError("We should never get here!!!");
1692                 }
1693         }
1694
1695         Log("SHUTTLE", Form("\t\t\t^*^*^*^*^*^*^*^*^*^*^*^* run %d: FINISH ^*^*^*^*^*^*^*^*^*^*^*^*",
1696                                                         GetCurrentRun()));
1697
1698         //check if shuttle is done for this run, if so update logbook
1699         TObjArray checkEntryArray;
1700         checkEntryArray.SetOwner(1);
1701         TString whereClause = Form("where run=%d", GetCurrentRun());
1702         if (!QueryShuttleLogbook(whereClause.Data(), checkEntryArray) || checkEntryArray.GetEntries() == 0) {
1703                 Log("SHUTTLE", Form("Process - Warning: Cannot check status of run %d on Shuttle logbook!",
1704                                                 GetCurrentRun()));
1705                 return hasError == kFALSE;
1706         }
1707
1708         AliShuttleLogbookEntry* checkEntry = dynamic_cast<AliShuttleLogbookEntry*>
1709                                                 (checkEntryArray.At(0));
1710
1711         if (checkEntry)
1712         {
1713                 if (checkEntry->IsDone())
1714                 {
1715                         Log("SHUTTLE","Process - Shuttle is DONE. Updating logbook");
1716                         UpdateShuttleLogbook("shuttle_done");
1717                 }
1718                 else
1719                 {
1720                         for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
1721                         {
1722                                 if (checkEntry->GetDetectorStatus(iDet) == AliShuttleLogbookEntry::kUnprocessed)
1723                                 {
1724                                         AliDebug(2, Form("Run %d: setting %s as \"not first time unprocessed\"",
1725                                                         checkEntry->GetRun(), GetDetName(iDet)));
1726                                         fFirstUnprocessed[iDet] = kFALSE;
1727                                 }
1728                         }
1729                 }
1730         }
1731
1732         // remove ML instance
1733         delete fMonaLisa;
1734         fMonaLisa = 0;
1735
1736         fLogbookEntry = 0;
1737
1738         return hasError == kFALSE;
1739 }
1740
1741 //______________________________________________________________________________________________
1742 Bool_t AliShuttle::ProcessCurrentDetector()
1743 {
1744         //
1745         // Makes data retrieval just for a specific detector (fCurrentDetector).
1746         // Threre should be a configuration for this detector.
1747
1748         Log("SHUTTLE", Form("ProcessCurrentDetector - Retrieving values for %s, run %d", 
1749                                                 fCurrentDetector.Data(), GetCurrentRun()));
1750
1751         TString wd = gSystem->WorkingDirectory();
1752         
1753         if (!CleanReferenceStorage(fCurrentDetector.Data()))
1754                 return kFALSE;
1755         
1756         gSystem->ChangeDirectory(wd.Data());
1757         
1758         TMap* dcsMap = new TMap();
1759
1760         // call preprocessor
1761         AliPreprocessor* aPreprocessor =
1762                 dynamic_cast<AliPreprocessor*> (fPreprocessorMap.GetValue(fCurrentDetector));
1763
1764         aPreprocessor->Initialize(GetCurrentRun(), GetCurrentStartTime(), GetCurrentEndTime());
1765
1766         Bool_t processDCS = aPreprocessor->ProcessDCS();
1767
1768         if (!processDCS)
1769         {
1770                 Log(fCurrentDetector, "ProcessCurrentDetector -"
1771                         " The preprocessor requested to skip the retrieval of DCS values");
1772         }
1773         else if (fTestMode & kSkipDCS)
1774         {
1775                 Log(fCurrentDetector, "ProcessCurrentDetector - In TESTMODE: Skipping DCS processing");
1776         } 
1777         else if (fTestMode & kErrorDCS)
1778         {
1779                 Log(fCurrentDetector, "ProcessCurrentDetector - In TESTMODE: Simulating DCS error");
1780                 UpdateShuttleStatus(AliShuttleStatus::kDCSStarted);
1781                 UpdateShuttleStatus(AliShuttleStatus::kDCSError);
1782                 delete dcsMap;
1783                 return kFALSE;
1784         } else {
1785
1786                 UpdateShuttleStatus(AliShuttleStatus::kDCSStarted);
1787
1788                 // Query DCS archive
1789                 Int_t nServers = fConfig->GetNServers(fCurrentDetector);
1790                 
1791                 for (int iServ=0; iServ<nServers; iServ++)
1792                 {
1793                 
1794                         TString host(fConfig->GetDCSHost(fCurrentDetector, iServ));
1795                         Int_t port = fConfig->GetDCSPort(fCurrentDetector, iServ);
1796                         Int_t multiSplit = fConfig->GetMultiSplit(fCurrentDetector, iServ);
1797
1798                         Log(fCurrentDetector, Form("ProcessCurrentDetector -"
1799                                         " Querying DCS Amanda server %s:%d (%d of %d)", 
1800                                         host.Data(), port, iServ+1, nServers));
1801                         
1802                         TMap* aliasMap = 0;
1803                         TMap* dpMap = 0;
1804         
1805                         if (fConfig->GetDCSAliases(fCurrentDetector, iServ)->GetEntries() > 0)
1806                         {
1807                                 aliasMap = GetValueSet(host, port, 
1808                                                 fConfig->GetDCSAliases(fCurrentDetector, iServ), 
1809                                                 kAlias, multiSplit);
1810                                 if (!aliasMap)
1811                                 {
1812                                         Log(fCurrentDetector, 
1813                                                 Form("ProcessCurrentDetector -"
1814                                                         " Error retrieving DCS aliases from server %s."
1815                                                         " Sending mail to DCS experts!", host.Data()));
1816                                         UpdateShuttleStatus(AliShuttleStatus::kDCSError);
1817                                         
1818                                         if (!SendMailToDCS())
1819                                                 Log("SHUTTLE", Form("ProcessCurrentDetector - Could not send mail to DCS experts!"));
1820
1821                                         delete dcsMap;
1822                                         return kFALSE;
1823                                 }
1824                         }
1825                         
1826                         if (fConfig->GetDCSDataPoints(fCurrentDetector, iServ)->GetEntries() > 0)
1827                         {
1828                                 dpMap = GetValueSet(host, port, 
1829                                                 fConfig->GetDCSDataPoints(fCurrentDetector, iServ), 
1830                                                 kDP, multiSplit);
1831                                 if (!dpMap)
1832                                 {
1833                                         Log(fCurrentDetector, 
1834                                                 Form("ProcessCurrentDetector -"
1835                                                         " Error retrieving DCS data points from server %s."
1836                                                         " Sending mail to DCS experts!", host.Data()));
1837                                         UpdateShuttleStatus(AliShuttleStatus::kDCSError);
1838                                         
1839                                         if (!SendMailToDCS())
1840                                                 Log("SHUTTLE", Form("ProcessCurrentDetector - Could not send mail to DCS experts!"));
1841                                         
1842                                         if (aliasMap) delete aliasMap;
1843                                         delete dcsMap;
1844                                         return kFALSE;
1845                                 }                               
1846                         }
1847                         
1848                         // merge aliasMap and dpMap into dcsMap
1849                         if(aliasMap) {
1850                                 TIter iter(aliasMap);
1851                                 TObjString* key = 0;
1852                                 while ((key = (TObjString*) iter.Next()))
1853                                         dcsMap->Add(key, aliasMap->GetValue(key->String()));
1854                                 
1855                                 aliasMap->SetOwner(kFALSE);
1856                                 delete aliasMap;
1857                         }       
1858                         
1859                         if(dpMap) {
1860                                 TIter iter(dpMap);
1861                                 TObjString* key = 0;
1862                                 while ((key = (TObjString*) iter.Next()))
1863                                         dcsMap->Add(key, dpMap->GetValue(key->String()));
1864                                 
1865                                 dpMap->SetOwner(kFALSE);
1866                                 delete dpMap;
1867                         }
1868                 }
1869         }
1870         
1871         // save map into file, to help debugging in case of preprocessor error
1872         TFile* f = TFile::Open("DCSMap.root","recreate");
1873         f->cd();
1874         dcsMap->Write("DCSMap", TObject::kSingleKey);
1875         f->Close();
1876         delete f;
1877         
1878         // DCS Archive DB processing successful. Call Preprocessor!
1879         UpdateShuttleStatus(AliShuttleStatus::kPPStarted);
1880
1881         UInt_t returnValue = aPreprocessor->Process(dcsMap);
1882
1883         if (returnValue > 0) // Preprocessor error!
1884         {
1885                 Log(fCurrentDetector, Form("ProcessCurrentDetector - "
1886                                 "Preprocessor failed. Process returned %d.", returnValue));
1887                 UpdateShuttleStatus(AliShuttleStatus::kPPError);
1888                 dcsMap->DeleteAll();
1889                 delete dcsMap;
1890                 return kFALSE;
1891         }
1892         
1893         // preprocessor ok!
1894         UpdateShuttleStatus(AliShuttleStatus::kPPDone);
1895         Log(fCurrentDetector, Form("ProcessCurrentDetector - %s preprocessor returned success",
1896                                 fCurrentDetector.Data()));
1897
1898         dcsMap->DeleteAll();
1899         delete dcsMap;
1900
1901         return kTRUE;
1902 }
1903
1904 //______________________________________________________________________________________________
1905 Bool_t AliShuttle::QueryShuttleLogbook(const char* whereClause,
1906                 TObjArray& entries)
1907 {
1908         // Query DAQ's Shuttle logbook and fills detector status object.
1909         // Call QueryRunParameters to query DAQ logbook for run parameters.
1910         //
1911
1912         entries.SetOwner(1);
1913
1914         // check connection, in case connect
1915         if(!Connect(3)) return kFALSE;
1916
1917         TString sqlQuery;
1918         sqlQuery = Form("select * from %s %s order by run", fConfig->GetShuttlelbTable(), whereClause);
1919
1920         TSQLResult* aResult = fServer[3]->Query(sqlQuery);
1921         if (!aResult) {
1922                 AliError(Form("Can't execute query <%s>!", sqlQuery.Data()));
1923                 return kFALSE;
1924         }
1925
1926         AliDebug(2,Form("Query = %s", sqlQuery.Data()));
1927
1928         if(aResult->GetRowCount() == 0) {
1929                 Log("SHUTTLE", "No entries in Shuttle Logbook match request");
1930                 delete aResult;
1931                 return kTRUE;
1932         }
1933
1934         // TODO Check field count!
1935         const UInt_t nCols = 23;
1936         if (aResult->GetFieldCount() != (Int_t) nCols) {
1937                 Log("SHUTTLE", "Invalid SQL result field number!");
1938                 delete aResult;
1939                 return kFALSE;
1940         }
1941
1942         TSQLRow* aRow;
1943         while ((aRow = aResult->Next())) {
1944                 TString runString(aRow->GetField(0), aRow->GetFieldLength(0));
1945                 Int_t run = runString.Atoi();
1946
1947                 AliShuttleLogbookEntry *entry = QueryRunParameters(run);
1948                 if (!entry)
1949                         continue;
1950
1951                 // loop on detectors
1952                 for(UInt_t ii = 0; ii < nCols; ii++)
1953                         entry->SetDetectorStatus(aResult->GetFieldName(ii), aRow->GetField(ii));
1954
1955                 entries.AddLast(entry);
1956                 delete aRow;
1957         }
1958
1959         delete aResult;
1960         return kTRUE;
1961 }
1962
1963 //______________________________________________________________________________________________
1964 AliShuttleLogbookEntry* AliShuttle::QueryRunParameters(Int_t run)
1965 {
1966         //
1967         // Retrieve run parameters written in the DAQ logbook and sets them into AliShuttleLogbookEntry object
1968         //
1969
1970         // check connection, in case connect
1971         if (!Connect(3))
1972                 return 0;
1973
1974         TString sqlQuery;
1975         sqlQuery.Form("select * from %s where run=%d", fConfig->GetDAQlbTable(), run);
1976
1977         TSQLResult* aResult = fServer[3]->Query(sqlQuery);
1978         if (!aResult) {
1979                 Log("SHUTTLE", Form("Can't execute query <%s>!", sqlQuery.Data()));
1980                 return 0;
1981         }
1982
1983         if (aResult->GetRowCount() == 0) {
1984                 Log("SHUTTLE", Form("QueryRunParameters - No entry in DAQ Logbook for run %d. Skipping", run));
1985                 delete aResult;
1986                 return 0;
1987         }
1988
1989         if (aResult->GetRowCount() > 1) {
1990                 Log("SHUTTLE", Form("QueryRunParameters - UNEXPECTED: "
1991                                 "more than one entry in DAQ Logbook for run %d!", run));
1992                 delete aResult;
1993                 return 0;
1994         }
1995
1996         TSQLRow* aRow = aResult->Next();
1997         if (!aRow)
1998         {
1999                 Log("SHUTTLE", Form("QueryRunParameters - Could not retrieve row for run %d. Skipping", run));
2000                 delete aResult;
2001                 return 0;
2002         }
2003
2004         AliShuttleLogbookEntry* entry = new AliShuttleLogbookEntry(run);
2005
2006         for (Int_t ii = 0; ii < aResult->GetFieldCount(); ii++)
2007                 entry->SetRunParameter(aResult->GetFieldName(ii), aRow->GetField(ii));
2008
2009         UInt_t startTime = entry->GetStartTime();
2010         UInt_t endTime = entry->GetEndTime();
2011
2012         if (!startTime || !endTime || startTime > endTime) {
2013                 Log("SHUTTLE",
2014                         Form("QueryRunParameters - Invalid parameters for Run %d: startTime = %d, endTime = %d",
2015                                 run, startTime, endTime));
2016                 delete entry;
2017                 delete aRow;
2018                 delete aResult;
2019                 return 0;
2020         }
2021
2022         delete aRow;
2023         delete aResult;
2024
2025         return entry;
2026 }
2027
2028 //______________________________________________________________________________________________
2029 TMap* AliShuttle::GetValueSet(const char* host, Int_t port, const TSeqCollection* entries,
2030                               DCSType type, Int_t multiSplit)
2031 {
2032         // Retrieve all "entry" data points from the DCS server
2033         // host, port: TSocket connection parameters
2034         // entries: list of name of the alias or data point
2035         // type: kAlias or kDP
2036         // returns TMap of values, 0 when failure
2037         
2038         AliDCSClient client(host, port, fTimeout, fRetries, multiSplit);
2039
2040         TMap* result = 0;
2041         if (type == kAlias)
2042         {
2043                 result = client.GetAliasValues(entries, GetCurrentStartTime(), 
2044                         GetCurrentEndTime());
2045         } 
2046         else if (type == kDP)
2047         {
2048                 result = client.GetDPValues(entries, GetCurrentStartTime(), 
2049                         GetCurrentEndTime());
2050         }
2051
2052         if (result == 0)
2053         {
2054                 Log(fCurrentDetector.Data(), Form("GetValueSet - Can't get entries! Reason: %s",
2055                         client.GetErrorString(client.GetResultErrorCode())));
2056                 if (client.GetResultErrorCode() == AliDCSClient::fgkServerError)        
2057                         Log(fCurrentDetector.Data(), Form("GetValueSet - Server error code: %s",
2058                                 client.GetServerError().Data()));
2059
2060                 return 0;
2061         }
2062                 
2063         return result;
2064 }
2065
2066 //______________________________________________________________________________________________
2067 const char* AliShuttle::GetFile(Int_t system, const char* detector,
2068                 const char* id, const char* source)
2069 {
2070         // Get calibration file from file exchange servers
2071         // First queris the FXS database for the file name, using the run, detector, id and source info
2072         // then calls RetrieveFile(filename) for actual copy to local disk
2073         // run: current run being processed (given by Logbook entry fLogbookEntry)
2074         // detector: the Preprocessor name
2075         // id: provided as a parameter by the Preprocessor
2076         // source: provided by the Preprocessor through GetFileSources function
2077
2078         // check if test mode should simulate a FXS error
2079         if (fTestMode & kErrorFXSFiles)
2080         {
2081                 Log(detector, Form("GetFile - In TESTMODE - Simulating error while connecting to %s FXS", GetSystemName(system)));
2082                 return 0;
2083         }
2084         
2085         // check connection, in case connect
2086         if (!Connect(system))
2087         {
2088                 Log(detector, Form("GetFile - Couldn't connect to %s FXS database", GetSystemName(system)));
2089                 return 0;
2090         }
2091
2092         // Query preparation
2093         TString sourceName(source);
2094         Int_t nFields = 3;
2095         TString sqlQueryStart = Form("select filePath,size,fileChecksum from %s where",
2096                                                                 fConfig->GetFXSdbTable(system));
2097         TString whereClause = Form("run=%d and detector=\"%s\" and fileId=\"%s\"",
2098                                                                 GetCurrentRun(), detector, id);
2099
2100         if (system == kDAQ)
2101         {
2102                 whereClause += Form(" and DAQsource=\"%s\"", source);
2103         }
2104         else if (system == kDCS)
2105         {
2106                 sourceName="none";
2107         }
2108         else if (system == kHLT)
2109         {
2110                 whereClause += Form(" and DDLnumbers=\"%s\"", source);
2111                 nFields = 3;
2112         }
2113
2114         TString sqlQuery = Form("%s %s", sqlQueryStart.Data(), whereClause.Data());
2115
2116         AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
2117
2118         // Query execution
2119         TSQLResult* aResult = 0;
2120         aResult = dynamic_cast<TSQLResult*> (fServer[system]->Query(sqlQuery));
2121         if (!aResult) {
2122                 Log(detector, Form("GetFileName - Can't execute SQL query to %s database for: id = %s, source = %s",
2123                                 GetSystemName(system), id, sourceName.Data()));
2124                 return 0;
2125         }
2126
2127         if(aResult->GetRowCount() == 0)
2128         {
2129                 Log(detector,
2130                         Form("GetFileName - No entry in %s FXS db for: id = %s, source = %s",
2131                                 GetSystemName(system), id, sourceName.Data()));
2132                 delete aResult;
2133                 return 0;
2134         }
2135
2136         if (aResult->GetRowCount() > 1) {
2137                 Log(detector,
2138                         Form("GetFileName - More than one entry in %s FXS db for: id = %s, source = %s",
2139                                 GetSystemName(system), id, sourceName.Data()));
2140                 delete aResult;
2141                 return 0;
2142         }
2143
2144         if (aResult->GetFieldCount() != nFields) {
2145                 Log(detector,
2146                         Form("GetFileName - Wrong field count in %s FXS db for: id = %s, source = %s",
2147                                 GetSystemName(system), id, sourceName.Data()));
2148                 delete aResult;
2149                 return 0;
2150         }
2151
2152         TSQLRow* aRow = dynamic_cast<TSQLRow*> (aResult->Next());
2153
2154         if (!aRow){
2155                 Log(detector, Form("GetFileName - Empty set result in %s FXS db from query: id = %s, source = %s",
2156                                 GetSystemName(system), id, sourceName.Data()));
2157                 delete aResult;
2158                 return 0;
2159         }
2160
2161         TString filePath(aRow->GetField(0), aRow->GetFieldLength(0));
2162         TString fileSize(aRow->GetField(1), aRow->GetFieldLength(1));
2163         TString fileChecksum(aRow->GetField(2), aRow->GetFieldLength(2));
2164
2165         delete aResult;
2166         delete aRow;
2167
2168         AliDebug(2, Form("filePath = %s; size = %s, fileChecksum = %s",
2169                                 filePath.Data(), fileSize.Data(), fileChecksum.Data()));
2170
2171         // retrieved file is renamed to make it unique
2172         TString localFileName = Form("%s/%s_%d_process/%s_%s_%d_%s_%s.shuttle",
2173                                         GetShuttleTempDir(), detector, GetCurrentRun(),
2174                                         GetSystemName(system), detector, GetCurrentRun(), 
2175                                         id, sourceName.Data());
2176
2177
2178         // file retrieval from FXS
2179         UInt_t nRetries = 0;
2180         UInt_t maxRetries = 3;
2181         Bool_t result = kFALSE;
2182
2183         // copy!! if successful TSystem::Exec returns 0
2184         while(nRetries++ < maxRetries) {
2185                 AliDebug(2, Form("Trying to copy file. Retry # %d", nRetries));
2186                 result = RetrieveFile(system, filePath.Data(), localFileName.Data());
2187                 if(!result)
2188                 {
2189                         Log(detector, Form("GetFileName - Copy of file %s from %s FXS failed",
2190                                         filePath.Data(), GetSystemName(system)));
2191                         continue;
2192                 } 
2193
2194                 if (fileChecksum.Length()>0)
2195                 {
2196                         // compare md5sum of local file with the one stored in the FXS DB
2197                         Int_t md5Comp = gSystem->Exec(Form("md5sum %s |grep %s 2>&1 > /dev/null",
2198                                                 localFileName.Data(), fileChecksum.Data()));
2199
2200                         if (md5Comp != 0)
2201                         {
2202                                 Log(detector, Form("GetFileName - md5sum of file %s does not match with local copy!",
2203                                                         filePath.Data()));
2204                                 result = kFALSE;
2205                                 continue;
2206                         }
2207                 } else {
2208                         Log(fCurrentDetector, Form("GetFile - md5sum of file %s not set in %s database, skipping comparison",
2209                                                         filePath.Data(), GetSystemName(system)));
2210                 }
2211                 if (result) break;
2212         }
2213
2214         if(!result) return 0;
2215
2216         fFXSCalled[system]=kTRUE;
2217         TObjString *fileParams = new TObjString(Form("%s#!?!#%s", id, sourceName.Data()));
2218         fFXSlist[system].Add(fileParams);
2219
2220         static TString staticLocalFileName;
2221         staticLocalFileName.Form("%s", localFileName.Data());
2222         
2223         Log(fCurrentDetector, Form("GetFile - Retrieved file with id %s and "
2224                         "source %s from %s to %s", id, source, 
2225                         GetSystemName(system), localFileName.Data()));
2226                         
2227         return staticLocalFileName.Data();
2228 }
2229
2230 //______________________________________________________________________________________________
2231 Bool_t AliShuttle::RetrieveFile(UInt_t system, const char* fxsFileName, const char* localFileName)
2232 {
2233         //
2234         // Copies file from FXS to local Shuttle machine
2235         //
2236
2237         // check temp directory: trying to cd to temp; if it does not exist, create it
2238         AliDebug(2, Form("Copy file %s from %s FXS into %s",
2239                         GetSystemName(system), fxsFileName, localFileName));
2240                         
2241         TString tmpDir(localFileName);
2242         
2243         tmpDir = tmpDir(0,tmpDir.Last('/'));
2244
2245         Int_t noDir = gSystem->GetPathInfo(tmpDir.Data(), 0, (Long64_t*) 0, 0, 0);
2246         if (noDir) // temp dir does not exists!
2247         {
2248                 if (gSystem->mkdir(tmpDir.Data(), 1))
2249                 {
2250                         Log(fCurrentDetector.Data(), "RetrieveFile - could not make temp directory!!");
2251                         return kFALSE;
2252                 }
2253         }
2254
2255         TString baseFXSFolder;
2256         if (system == kDAQ)
2257         {
2258                 baseFXSFolder = "FES/";
2259         }
2260         else if (system == kDCS)
2261         {
2262                 baseFXSFolder = "";
2263         }
2264         else if (system == kHLT)
2265         {
2266                 baseFXSFolder = "/opt/FXS/";
2267         }
2268
2269
2270         TString command = Form("scp -oPort=%d -2 %s@%s:%s%s %s",
2271                 fConfig->GetFXSPort(system),
2272                 fConfig->GetFXSUser(system),
2273                 fConfig->GetFXSHost(system),
2274                 baseFXSFolder.Data(),
2275                 fxsFileName,
2276                 localFileName);
2277
2278         AliDebug(2, Form("%s",command.Data()));
2279
2280         Bool_t result = (gSystem->Exec(command.Data()) == 0);
2281
2282         return result;
2283 }
2284
2285 //______________________________________________________________________________________________
2286 TList* AliShuttle::GetFileSources(Int_t system, const char* detector, const char* id)
2287 {
2288         //
2289         // Get sources producing the condition file Id from file exchange servers
2290         // if id is NULL all sources are returned (distinct)
2291         //
2292
2293         Log(detector, Form("GetFileSources - Retrieving sources with id %s from %s", id, GetSystemName(system)));
2294         
2295         // check if test mode should simulate a FXS error
2296         if (fTestMode & kErrorFXSSources)
2297         {
2298                 Log(detector, Form("GetFileSources - In TESTMODE - Simulating error while connecting to %s FXS", GetSystemName(system)));
2299                 return 0;
2300         }
2301
2302         if (system == kDCS)
2303         {
2304                 Log(detector, "GetFileSources - WARNING: DCS system has only one source of data!");
2305                 TList *list = new TList();
2306                 list->SetOwner(1);
2307                 list->Add(new TObjString(" "));
2308                 return list;
2309         }
2310
2311         // check connection, in case connect
2312         if (!Connect(system))
2313         {
2314                 Log(detector, Form("GetFileSources - Couldn't connect to %s FXS database", GetSystemName(system)));
2315                 return NULL;
2316         }
2317
2318         TString sourceName = 0;
2319         if (system == kDAQ)
2320         {
2321                 sourceName = "DAQsource";
2322         } else if (system == kHLT)
2323         {
2324                 sourceName = "DDLnumbers";
2325         }
2326
2327         TString sqlQueryStart = Form("select distinct %s from %s where", sourceName.Data(), fConfig->GetFXSdbTable(system));
2328         TString whereClause = Form("run=%d and detector=\"%s\"",
2329                                 GetCurrentRun(), detector);
2330         if (id)
2331                 whereClause += Form(" and fileId=\"%s\"", id);
2332         TString sqlQuery = Form("%s %s", sqlQueryStart.Data(), whereClause.Data());
2333
2334         AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
2335
2336         // Query execution
2337         TSQLResult* aResult;
2338         aResult = fServer[system]->Query(sqlQuery);
2339         if (!aResult) {
2340                 Log(detector, Form("GetFileSources - Can't execute SQL query to %s database for id: %s",
2341                                 GetSystemName(system), id));
2342                 return 0;
2343         }
2344
2345         TList *list = new TList();
2346         list->SetOwner(1);
2347         
2348         if (aResult->GetRowCount() == 0)
2349         {
2350                 Log(detector,
2351                         Form("GetFileSources - No entry in %s FXS table for id: %s", GetSystemName(system), id));
2352                 delete aResult;
2353                 return list;
2354         }
2355
2356         Log(detector, Form("GetFileSources - Found %d sources", aResult->GetRowCount()));
2357
2358         TSQLRow* aRow;
2359         while ((aRow = aResult->Next()))
2360         {
2361
2362                 TString source(aRow->GetField(0), aRow->GetFieldLength(0));
2363                 AliDebug(2, Form("%s = %s", sourceName.Data(), source.Data()));
2364                 list->Add(new TObjString(source));
2365                 delete aRow;
2366         }
2367
2368         delete aResult;
2369
2370         return list;
2371 }
2372
2373 //______________________________________________________________________________________________
2374 TList* AliShuttle::GetFileIDs(Int_t system, const char* detector, const char* source)
2375 {
2376         //
2377         // Get all ids of condition files produced by a given source from file exchange servers
2378         //
2379         
2380         Log(detector, Form("GetFileIDs - Retrieving ids with source %s with %s", source, GetSystemName(system)));
2381
2382         // check if test mode should simulate a FXS error
2383         if (fTestMode & kErrorFXSSources)
2384         {
2385                 Log(detector, Form("GetFileIDs - In TESTMODE - Simulating error while connecting to %s FXS", GetSystemName(system)));
2386                 return 0;
2387         }
2388
2389         // check connection, in case connect
2390         if (!Connect(system))
2391         {
2392                 Log(detector, Form("GetFileIDs - Couldn't connect to %s FXS database", GetSystemName(system)));
2393                 return NULL;
2394         }
2395
2396         TString sourceName = 0;
2397         if (system == kDAQ)
2398         {
2399                 sourceName = "DAQsource";
2400         } else if (system == kHLT)
2401         {
2402                 sourceName = "DDLnumbers";
2403         }
2404
2405         TString sqlQueryStart = Form("select fileId from %s where", fConfig->GetFXSdbTable(system));
2406         TString whereClause = Form("run=%d and detector=\"%s\"",
2407                                 GetCurrentRun(), detector);
2408         if (sourceName.Length() > 0 && source)
2409                 whereClause += Form(" and %s=\"%s\"", sourceName.Data(), source);
2410         TString sqlQuery = Form("%s %s", sqlQueryStart.Data(), whereClause.Data());
2411
2412         AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
2413
2414         // Query execution
2415         TSQLResult* aResult;
2416         aResult = fServer[system]->Query(sqlQuery);
2417         if (!aResult) {
2418                 Log(detector, Form("GetFileIDs - Can't execute SQL query to %s database for source: %s",
2419                                 GetSystemName(system), source));
2420                 return 0;
2421         }
2422
2423         TList *list = new TList();
2424         list->SetOwner(1);
2425         
2426         if (aResult->GetRowCount() == 0)
2427         {
2428                 Log(detector,
2429                         Form("GetFileIDs - No entry in %s FXS table for source: %s", GetSystemName(system), source));
2430                 delete aResult;
2431                 return list;
2432         }
2433
2434         Log(detector, Form("GetFileIDs - Found %d ids", aResult->GetRowCount()));
2435
2436         TSQLRow* aRow;
2437
2438         while ((aRow = aResult->Next()))
2439         {
2440
2441                 TString id(aRow->GetField(0), aRow->GetFieldLength(0));
2442                 AliDebug(2, Form("fileId = %s", id.Data()));
2443                 list->Add(new TObjString(id));
2444                 delete aRow;
2445         }
2446
2447         delete aResult;
2448
2449         return list;
2450 }
2451
2452 //______________________________________________________________________________________________
2453 Bool_t AliShuttle::Connect(Int_t system)
2454 {
2455         // Connect to MySQL Server of the system's FXS MySQL databases
2456         // DAQ Logbook, Shuttle Logbook and DAQ FXS db are on the same host
2457         //
2458
2459         // check connection: if already connected return
2460         if(fServer[system] && fServer[system]->IsConnected()) return kTRUE;
2461
2462         TString dbHost, dbUser, dbPass, dbName;
2463
2464         if (system < 3) // FXS db servers
2465         {
2466                 dbHost = Form("mysql://%s:%d", fConfig->GetFXSdbHost(system), fConfig->GetFXSdbPort(system));
2467                 dbUser = fConfig->GetFXSdbUser(system);
2468                 dbPass = fConfig->GetFXSdbPass(system);
2469                 dbName =   fConfig->GetFXSdbName(system);
2470         } else { // Run & Shuttle logbook servers
2471         // TODO Will the Shuttle logbook server be the same as the Run logbook server ???
2472                 dbHost = Form("mysql://%s:%d", fConfig->GetDAQlbHost(), fConfig->GetDAQlbPort());
2473                 dbUser = fConfig->GetDAQlbUser();
2474                 dbPass = fConfig->GetDAQlbPass();
2475                 dbName =   fConfig->GetDAQlbDB();
2476         }
2477
2478         fServer[system] = TSQLServer::Connect(dbHost.Data(), dbUser.Data(), dbPass.Data());
2479         if (!fServer[system] || !fServer[system]->IsConnected()) {
2480                 if(system < 3)
2481                 {
2482                 AliError(Form("Can't establish connection to FXS database for %s",
2483                                         AliShuttleInterface::GetSystemName(system)));
2484                 } else {
2485                 AliError("Can't establish connection to Run logbook.");
2486                 }
2487                 if(fServer[system]) delete fServer[system];
2488                 return kFALSE;
2489         }
2490
2491         // Get tables
2492         TSQLResult* aResult=0;
2493         switch(system){
2494                 case kDAQ:
2495                         aResult = fServer[kDAQ]->GetTables(dbName.Data());
2496                         break;
2497                 case kDCS:
2498                         aResult = fServer[kDCS]->GetTables(dbName.Data());
2499                         break;
2500                 case kHLT:
2501                         aResult = fServer[kHLT]->GetTables(dbName.Data());
2502                         break;
2503                 default:
2504                         aResult = fServer[3]->GetTables(dbName.Data());
2505                         break;
2506         }
2507
2508         delete aResult;
2509         return kTRUE;
2510 }
2511
2512 //______________________________________________________________________________________________
2513 Bool_t AliShuttle::UpdateTable()
2514 {
2515         //
2516         // Update FXS table filling time_processed field in all rows corresponding to current run and detector
2517         //
2518
2519         Bool_t result = kTRUE;
2520
2521         for (UInt_t system=0; system<3; system++)
2522         {
2523                 if(!fFXSCalled[system]) continue;
2524
2525                 // check connection, in case connect
2526                 if (!Connect(system))
2527                 {
2528                         Log(fCurrentDetector, Form("UpdateTable - Couldn't connect to %s FXS database", GetSystemName(system)));
2529                         result = kFALSE;
2530                         continue;
2531                 }
2532
2533                 TTimeStamp now; // now
2534
2535                 // Loop on FXS list entries
2536                 TIter iter(&fFXSlist[system]);
2537                 TObjString *aFXSentry=0;
2538                 while ((aFXSentry = dynamic_cast<TObjString*> (iter.Next())))
2539                 {
2540                         TString aFXSentrystr = aFXSentry->String();
2541                         TObjArray *aFXSarray = aFXSentrystr.Tokenize("#!?!#");
2542                         if (!aFXSarray || aFXSarray->GetEntries() != 2 )
2543                         {
2544                                 Log(fCurrentDetector, Form("UpdateTable - error updating %s FXS entry. Check string: <%s>",
2545                                         GetSystemName(system), aFXSentrystr.Data()));
2546                                 if(aFXSarray) delete aFXSarray;
2547                                 result = kFALSE;
2548                                 continue;
2549                         }
2550                         const char* fileId = ((TObjString*) aFXSarray->At(0))->GetName();
2551                         const char* source = ((TObjString*) aFXSarray->At(1))->GetName();
2552
2553                         TString whereClause;
2554                         if (system == kDAQ)
2555                         {
2556                                 whereClause = Form("where run=%d and detector=\"%s\" and fileId=\"%s\" and DAQsource=\"%s\";",
2557                                                         GetCurrentRun(), fCurrentDetector.Data(), fileId, source);
2558                         }
2559                         else if (system == kDCS)
2560                         {
2561                                 whereClause = Form("where run=%d and detector=\"%s\" and fileId=\"%s\";",
2562                                                         GetCurrentRun(), fCurrentDetector.Data(), fileId);
2563                         }
2564                         else if (system == kHLT)
2565                         {
2566                                 whereClause = Form("where run=%d and detector=\"%s\" and fileId=\"%s\" and DDLnumbers=\"%s\";",
2567                                                         GetCurrentRun(), fCurrentDetector.Data(), fileId, source);
2568                         }
2569
2570                         delete aFXSarray;
2571
2572                         TString sqlQuery = Form("update %s set time_processed=%d %s", fConfig->GetFXSdbTable(system),
2573                                                                 now.GetSec(), whereClause.Data());
2574
2575                         AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
2576
2577                         // Query execution
2578                         TSQLResult* aResult;
2579                         aResult = dynamic_cast<TSQLResult*> (fServer[system]->Query(sqlQuery));
2580                         if (!aResult)
2581                         {
2582                                 Log(fCurrentDetector, Form("UpdateTable - %s db: can't execute SQL query <%s>",
2583                                                                 GetSystemName(system), sqlQuery.Data()));
2584                                 result = kFALSE;
2585                                 continue;
2586                         }
2587                         delete aResult;
2588                 }
2589         }
2590
2591         return result;
2592 }
2593
2594 //______________________________________________________________________________________________
2595 Bool_t AliShuttle::UpdateTableFailCase()
2596 {
2597         // Update FXS table filling time_processed field in all rows corresponding to current run and detector
2598         // this is called in case the preprocessor is declared failed for the current run, because
2599         // the fields are updated only in case of success
2600
2601         Bool_t result = kTRUE;
2602
2603         for (UInt_t system=0; system<3; system++)
2604         {
2605                 // check connection, in case connect
2606                 if (!Connect(system))
2607                 {
2608                         Log(fCurrentDetector, Form("UpdateTableFailCase - Couldn't connect to %s FXS database",
2609                                                         GetSystemName(system)));
2610                         result = kFALSE;
2611                         continue;
2612                 }
2613
2614                 TTimeStamp now; // now
2615
2616                 // Loop on FXS list entries
2617
2618                 TString whereClause = Form("where run=%d and detector=\"%s\";",
2619                                                 GetCurrentRun(), fCurrentDetector.Data());
2620
2621
2622                 TString sqlQuery = Form("update %s set time_processed=%d %s", fConfig->GetFXSdbTable(system),
2623                                                         now.GetSec(), whereClause.Data());
2624
2625                 AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
2626
2627                 // Query execution
2628                 TSQLResult* aResult;
2629                 aResult = dynamic_cast<TSQLResult*> (fServer[system]->Query(sqlQuery));
2630                 if (!aResult)
2631                 {
2632                         Log(fCurrentDetector, Form("UpdateTableFailCase - %s db: can't execute SQL query <%s>",
2633                                                         GetSystemName(system), sqlQuery.Data()));
2634                         result = kFALSE;
2635                         continue;
2636                 }
2637                 delete aResult;
2638         }
2639
2640         return result;
2641 }
2642
2643 //______________________________________________________________________________________________
2644 Bool_t AliShuttle::UpdateShuttleLogbook(const char* detector, const char* status)
2645 {
2646         //
2647         // Update Shuttle logbook filling detector or shuttle_done column
2648         // ex. of usage: UpdateShuttleLogbook("PHOS", "DONE") or UpdateShuttleLogbook("shuttle_done")
2649         //
2650
2651         // check connection, in case connect
2652         if(!Connect(3)){
2653                 Log("SHUTTLE", "UpdateShuttleLogbook - Couldn't connect to DAQ Logbook.");
2654                 return kFALSE;
2655         }
2656
2657         TString detName(detector);
2658         TString setClause;
2659         if(detName == "shuttle_done")
2660         {
2661                 setClause = "set shuttle_done=1";
2662
2663                 // Send the information to ML
2664                 TMonaLisaText  mlStatus("SHUTTLE_status", "Done");
2665
2666                 TList mlList;
2667                 mlList.Add(&mlStatus);
2668
2669                 fMonaLisa->SendParameters(&mlList);
2670         } else {
2671                 TString statusStr(status);
2672                 if(statusStr.Contains("done", TString::kIgnoreCase) ||
2673                    statusStr.Contains("failed", TString::kIgnoreCase)){
2674                         setClause = Form("set %s=\"%s\"", detector, status);
2675                 } else {
2676                         Log("SHUTTLE",
2677                                 Form("UpdateShuttleLogbook - Invalid status <%s> for detector %s",
2678                                         status, detector));
2679                         return kFALSE;
2680                 }
2681         }
2682
2683         TString whereClause = Form("where run=%d", GetCurrentRun());
2684
2685         TString sqlQuery = Form("update %s %s %s",
2686                                         fConfig->GetShuttlelbTable(), setClause.Data(), whereClause.Data());
2687
2688         AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
2689
2690         // Query execution
2691         TSQLResult* aResult;
2692         aResult = dynamic_cast<TSQLResult*> (fServer[3]->Query(sqlQuery));
2693         if (!aResult) {
2694                 Log("SHUTTLE", Form("UpdateShuttleLogbook - Can't execute query <%s>", sqlQuery.Data()));
2695                 return kFALSE;
2696         }
2697         delete aResult;
2698
2699         return kTRUE;
2700 }
2701
2702 //______________________________________________________________________________________________
2703 Int_t AliShuttle::GetCurrentRun() const
2704 {
2705         //
2706         // Get current run from logbook entry
2707         //
2708
2709         return fLogbookEntry ? fLogbookEntry->GetRun() : -1;
2710 }
2711
2712 //______________________________________________________________________________________________
2713 UInt_t AliShuttle::GetCurrentStartTime() const
2714 {
2715         //
2716         // get current start time
2717         //
2718
2719         return fLogbookEntry ? fLogbookEntry->GetStartTime() : 0;
2720 }
2721
2722 //______________________________________________________________________________________________
2723 UInt_t AliShuttle::GetCurrentEndTime() const
2724 {
2725         //
2726         // get current end time from logbook entry
2727         //
2728
2729         return fLogbookEntry ? fLogbookEntry->GetEndTime() : 0;
2730 }
2731
2732 //______________________________________________________________________________________________
2733 UInt_t AliShuttle::GetCurrentYear() const
2734 {
2735         //
2736         // Get current year from logbook entry
2737         //
2738
2739         if (!fLogbookEntry) return 0;
2740         
2741         TTimeStamp startTime(GetCurrentStartTime());
2742         TString year =  Form("%d",startTime.GetDate());
2743         year = year(0,4);
2744         
2745         return year.Atoi();
2746 }
2747
2748 //______________________________________________________________________________________________
2749 const char* AliShuttle::GetLHCPeriod() const
2750 {
2751         //
2752         // Get current LHC period from logbook entry
2753         //
2754
2755         if (!fLogbookEntry) return 0;
2756                 
2757         return fLogbookEntry->GetRunParameter("LHCperiod");
2758 }
2759
2760 //______________________________________________________________________________________________
2761 void AliShuttle::Log(const char* detector, const char* message)
2762 {
2763         //
2764         // Fill log string with a message
2765         //
2766
2767         void* dir = gSystem->OpenDirectory(GetShuttleLogDir());
2768         if (dir == NULL) {
2769                 if (gSystem->mkdir(GetShuttleLogDir(), kTRUE)) {
2770                         AliError(Form("Can't open directory <%s>", GetShuttleLogDir()));
2771                         return;
2772                 }
2773
2774         } else {
2775                 gSystem->FreeDirectory(dir);
2776         }
2777
2778         TString toLog = Form("%s (%d): %s - ", TTimeStamp(time(0)).AsString("s"), getpid(), detector);
2779         if (GetCurrentRun() >= 0) 
2780                 toLog += Form("run %d - ", GetCurrentRun());
2781         toLog += Form("%s", message);
2782
2783         AliInfo(toLog.Data());
2784         
2785         // if we redirect the log output already to the file, leave here
2786         if (fOutputRedirected && strcmp(detector, "SHUTTLE") != 0)
2787                 return;
2788
2789         TString fileName = GetLogFileName(detector);
2790         
2791         gSystem->ExpandPathName(fileName);
2792
2793         ofstream logFile;
2794         logFile.open(fileName, ofstream::out | ofstream::app);
2795
2796         if (!logFile.is_open()) {
2797                 AliError(Form("Could not open file %s", fileName.Data()));
2798                 return;
2799         }
2800
2801         logFile << toLog.Data() << "\n";
2802
2803         logFile.close();
2804 }
2805
2806 //______________________________________________________________________________________________
2807 TString AliShuttle::GetLogFileName(const char* detector) const
2808 {
2809         // 
2810         // returns the name of the log file for a given sub detector
2811         //
2812         
2813         TString fileName;
2814         
2815         if (GetCurrentRun() >= 0) 
2816                 fileName.Form("%s/%s_%d.log", GetShuttleLogDir(), detector, GetCurrentRun());
2817         else
2818                 fileName.Form("%s/%s.log", GetShuttleLogDir(), detector);
2819
2820         return fileName;
2821 }
2822
2823 //______________________________________________________________________________________________
2824 Bool_t AliShuttle::Collect(Int_t run)
2825 {
2826         //
2827         // Collects conditions data for all UNPROCESSED run written to DAQ LogBook in case of run = -1 (default)
2828         // If a dedicated run is given this run is processed
2829         //
2830         // In operational mode, this is the Shuttle function triggered by the EOR signal.
2831         //
2832
2833         if (run == -1)
2834                 Log("SHUTTLE","Collect - Shuttle called. Collecting conditions data for unprocessed runs");
2835         else
2836                 Log("SHUTTLE", Form("Collect - Shuttle called. Collecting conditions data for run %d", run));
2837
2838         SetLastAction("Starting");
2839
2840         TString whereClause("where shuttle_done=0");
2841         if (run != -1)
2842                 whereClause += Form(" and run=%d", run);
2843
2844         TObjArray shuttleLogbookEntries;
2845         if (!QueryShuttleLogbook(whereClause, shuttleLogbookEntries))
2846         {
2847                 Log("SHUTTLE", "Collect - Can't retrieve entries from Shuttle logbook");
2848                 return kFALSE;
2849         }
2850
2851         if (shuttleLogbookEntries.GetEntries() == 0)
2852         {
2853                 if (run == -1)
2854                         Log("SHUTTLE","Collect - Found no UNPROCESSED runs in Shuttle logbook");
2855                 else
2856                         Log("SHUTTLE", Form("Collect - Run %d is already DONE "
2857                                                 "or it does not exist in Shuttle logbook", run));
2858                 return kTRUE;
2859         }
2860
2861         for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
2862                 fFirstUnprocessed[iDet] = kTRUE;
2863
2864         if (run != -1)
2865         {
2866                 // query Shuttle logbook for earlier runs, check if some detectors are unprocessed,
2867                 // flag them into fFirstUnprocessed array
2868                 TString whereClause(Form("where shuttle_done=0 and run < %d", run));
2869                 TObjArray tmpLogbookEntries;
2870                 if (!QueryShuttleLogbook(whereClause, tmpLogbookEntries))
2871                 {
2872                         Log("SHUTTLE", "Collect - Can't retrieve entries from Shuttle logbook");
2873                         return kFALSE;
2874                 }
2875
2876                 TIter iter(&tmpLogbookEntries);
2877                 AliShuttleLogbookEntry* anEntry = 0;
2878                 while ((anEntry = dynamic_cast<AliShuttleLogbookEntry*> (iter.Next())))
2879                 {
2880                         for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
2881                         {
2882                                 if (anEntry->GetDetectorStatus(iDet) == AliShuttleLogbookEntry::kUnprocessed)
2883                                 {
2884                                         AliDebug(2, Form("Run %d: setting %s as \"not first time unprocessed\"",
2885                                                         anEntry->GetRun(), GetDetName(iDet)));
2886                                         fFirstUnprocessed[iDet] = kFALSE;
2887                                 }
2888                         }
2889
2890                 }
2891
2892         }
2893
2894         if (!RetrieveConditionsData(shuttleLogbookEntries))
2895         {
2896                 Log("SHUTTLE", "Collect - Process of at least one run failed");
2897                 return kFALSE;
2898         }
2899
2900         Log("SHUTTLE", "Collect - Requested run(s) successfully processed");
2901         return kTRUE;
2902 }
2903
2904 //______________________________________________________________________________________________
2905 Bool_t AliShuttle::RetrieveConditionsData(const TObjArray& dateEntries)
2906 {
2907         //
2908         // Retrieve conditions data for all runs that aren't processed yet
2909         //
2910
2911         Bool_t hasError = kFALSE;
2912
2913         TIter iter(&dateEntries);
2914         AliShuttleLogbookEntry* anEntry;
2915
2916         while ((anEntry = (AliShuttleLogbookEntry*) iter.Next())){
2917                 if (!Process(anEntry)){
2918                         hasError = kTRUE;
2919                 }
2920
2921                 // clean SHUTTLE temp directory
2922                 //TString filename = Form("%s/*.shuttle", GetShuttleTempDir());
2923                 //RemoveFile(filename.Data());
2924         }
2925
2926         return hasError == kFALSE;
2927 }
2928
2929 //______________________________________________________________________________________________
2930 ULong_t AliShuttle::GetTimeOfLastAction() const
2931 {
2932         //
2933         // Gets time of last action
2934         //
2935
2936         ULong_t tmp;
2937
2938         fMonitoringMutex->Lock();
2939
2940         tmp = fLastActionTime;
2941
2942         fMonitoringMutex->UnLock();
2943
2944         return tmp;
2945 }
2946
2947 //______________________________________________________________________________________________
2948 const TString AliShuttle::GetLastAction() const
2949 {
2950         //
2951         // returns a string description of the last action
2952         //
2953
2954         TString tmp;
2955
2956         fMonitoringMutex->Lock();
2957         
2958         tmp = fLastAction;
2959         
2960         fMonitoringMutex->UnLock();
2961
2962         return tmp;
2963 }
2964
2965 //______________________________________________________________________________________________
2966 void AliShuttle::SetLastAction(const char* action)
2967 {
2968         //
2969         // updates the monitoring variables
2970         //
2971
2972         fMonitoringMutex->Lock();
2973
2974         fLastAction = action;
2975         fLastActionTime = time(0);
2976         
2977         fMonitoringMutex->UnLock();
2978 }
2979
2980 //______________________________________________________________________________________________
2981 const char* AliShuttle::GetRunParameter(const char* param)
2982 {
2983         //
2984         // returns run parameter read from DAQ logbook
2985         //
2986
2987         if(!fLogbookEntry) {
2988                 AliError("No logbook entry!");
2989                 return 0;
2990         }
2991
2992         return fLogbookEntry->GetRunParameter(param);
2993 }
2994
2995 //______________________________________________________________________________________________
2996 AliCDBEntry* AliShuttle::GetFromOCDB(const char* detector, const AliCDBPath& path)
2997 {
2998         //
2999         // returns object from OCDB valid for current run
3000         //
3001
3002         if (fTestMode & kErrorOCDB)
3003         {
3004                 Log(detector, "GetFromOCDB - In TESTMODE - Simulating error with OCDB");
3005                 return 0;
3006         }
3007         
3008         AliCDBStorage *sto = AliCDBManager::Instance()->GetStorage(fgkMainCDB);
3009         if (!sto)
3010         {
3011                 Log(detector, "GetFromOCDB - Cannot activate main OCDB for query!");
3012                 return 0;
3013         }
3014
3015         return dynamic_cast<AliCDBEntry*> (sto->Get(path, GetCurrentRun()));
3016 }
3017
3018 //______________________________________________________________________________________________
3019 Bool_t AliShuttle::SendMail()
3020 {
3021         //
3022         // sends a mail to the subdetector expert in case of preprocessor error
3023         //
3024         
3025         if (fTestMode != kNone)
3026                 return kTRUE;
3027
3028         void* dir = gSystem->OpenDirectory(GetShuttleLogDir());
3029         if (dir == NULL)
3030         {
3031                 if (gSystem->mkdir(GetShuttleLogDir(), kTRUE))
3032                 {
3033                         Log("SHUTTLE", Form("SendMail - Can't open directory <%s>", GetShuttleLogDir()));
3034                         return kFALSE;
3035                 }
3036
3037         } else {
3038                 gSystem->FreeDirectory(dir);
3039         }
3040
3041         TString bodyFileName;
3042         bodyFileName.Form("%s/mail.body", GetShuttleLogDir());
3043         gSystem->ExpandPathName(bodyFileName);
3044
3045         ofstream mailBody;
3046         mailBody.open(bodyFileName, ofstream::out);
3047
3048         if (!mailBody.is_open())
3049         {
3050                 Log("SHUTTLE", Form("Could not open mail body file %s", bodyFileName.Data()));
3051                 return kFALSE;
3052         }
3053
3054         TString to="";
3055         TIter iterExperts(fConfig->GetResponsibles(fCurrentDetector));
3056         TObjString *anExpert=0;
3057         while ((anExpert = (TObjString*) iterExperts.Next()))
3058         {
3059                 to += Form("%s,", anExpert->GetName());
3060         }
3061         to.Remove(to.Length()-1);
3062         AliDebug(2, Form("to: %s",to.Data()));
3063
3064         if (to.IsNull()) {
3065                 Log("SHUTTLE", "List of detector responsibles not yet set!");
3066                 return kFALSE;
3067         }
3068
3069         TString cc="alberto.colla@cern.ch";
3070
3071         TString subject = Form("%s Shuttle preprocessor FAILED in run %d !",
3072                                 fCurrentDetector.Data(), GetCurrentRun());
3073         AliDebug(2, Form("subject: %s", subject.Data()));
3074
3075         TString body = Form("Dear %s expert(s), \n\n", fCurrentDetector.Data());
3076         body += Form("SHUTTLE just detected that your preprocessor "
3077                         "failed processing run %d!!\n\n", GetCurrentRun());
3078         body += Form("Please check %s status on the SHUTTLE monitoring page: \n\n", fCurrentDetector.Data());
3079         body += Form("\thttp://pcalimonitor.cern.ch:8889/shuttle.jsp?time=168 \n\n");
3080         body += Form("Find the %s log for the current run on \n\n"
3081                 "\thttp://pcalishuttle01.cern.ch:8880/logs/%s_%d.log \n\n", 
3082                 fCurrentDetector.Data(), fCurrentDetector.Data(), GetCurrentRun());
3083         body += Form("The last 10 lines of %s log file are following:\n\n");
3084
3085         AliDebug(2, Form("Body begin: %s", body.Data()));
3086
3087         mailBody << body.Data();
3088         mailBody.close();
3089         mailBody.open(bodyFileName, ofstream::out | ofstream::app);
3090
3091         TString logFileName = Form("%s/%s_%d.log", GetShuttleLogDir(), fCurrentDetector.Data(), GetCurrentRun());
3092         TString tailCommand = Form("tail -n 10 %s >> %s", logFileName.Data(), bodyFileName.Data());
3093         if (gSystem->Exec(tailCommand.Data()))
3094         {
3095                 mailBody << Form("%s log file not found ...\n\n", fCurrentDetector.Data());
3096         }
3097
3098         TString endBody = Form("------------------------------------------------------\n\n");
3099         endBody += Form("In case of problems please contact the SHUTTLE core team.\n\n");
3100         endBody += "Please do not answer this message directly, it is automatically generated.\n\n";
3101         endBody += "Greetings,\n\n \t\t\tthe SHUTTLE\n";
3102
3103         AliDebug(2, Form("Body end: %s", endBody.Data()));
3104
3105         mailBody << endBody.Data();
3106
3107         mailBody.close();
3108
3109         // send mail!
3110         TString mailCommand = Form("mail -s \"%s\" -c %s %s < %s",
3111  &n