]> git.uio.no Git - u/mrichter/AliRoot.git/blob - SHUTTLE/AliShuttle.cxx
75e6e505ee40818b1586305167af25cec949665d
[u/mrichter/AliRoot.git] / SHUTTLE / AliShuttle.cxx
1 /**************************************************************************
2  * Copyright(c) 1998-1999, ALICE Experiment at CERN, All rights reserved. *
3  *                                                                        *
4  * Author: The ALICE Off-line Project.                                    *
5  * Contributors are mentioned in the code where appropriate.              *
6  *                                                                        *
7  * Permission to use, copy, modify and distribute this software and its   *
8  * documentation strictly for non-commercial purposes is hereby granted   *
9  * without fee, provided that the above copyright notice appears in all   *
10  * copies and that both the copyright notice and this permission notice   *
11  * appear in the supporting documentation. The authors make no claims     *
12  * about the suitability of this software for any purpose. It is          *
13  * provided "as is" without express or implied warranty.                  *
14  **************************************************************************/
15
16 /*
17 $Log$
18 Revision 1.50  2007/07/02 17:19:32  acolla
19 preprocessor is run in a temp directory that is removed when process is finished.
20
21 Revision 1.49  2007/06/29 10:45:06  acolla
22 Number of columns in MySql Shuttle logbook increased by one (HLT added)
23
24 Revision 1.48  2007/06/21 13:06:19  acolla
25 GetFileSources returns dummy list with 1 source if system=DCS (better than
26 returning error as it was)
27
28 Revision 1.47  2007/06/19 17:28:56  acolla
29 HLT updated; missing map bug removed.
30
31 Revision 1.46  2007/06/09 13:01:09  jgrosseo
32 Switching to retrieval of several DCS DPs at a time (multiDPrequest)
33
34 Revision 1.45  2007/05/30 06:35:20  jgrosseo
35 Adding functionality to the Shuttle/TestShuttle:
36 o) Function to retrieve list of sources from a given system (GetFileSources with id=0)
37 o) Function to retrieve list of IDs for a given source      (GetFileIDs)
38 These functions are needed for dealing with the tag files that are saved for the GRP preprocessor
39 Example code has been added to the TestProcessor in TestShuttle
40
41 Revision 1.44  2007/05/11 16:09:32  acolla
42 Reference files for ITS, MUON and PHOS are now stored in OfflineDetName/OnlineDetName/run_...
43 example: ITS/SPD/100_filename.root
44
45 Revision 1.43  2007/05/10 09:59:51  acolla
46 Various bug fixes in StoreRefFilesToGrid; Cleaning of reference storage before processing detector (CleanReferenceStorage)
47
48 Revision 1.42  2007/05/03 08:01:39  jgrosseo
49 typo in last commit :-(
50
51 Revision 1.41  2007/05/03 08:00:48  jgrosseo
52 fixing log message when pp want to skip dcs value retrieval
53
54 Revision 1.40  2007/04/27 07:06:48  jgrosseo
55 GetFileSources returns empty list in case of no files, but successful query
56 No mails sent in testmode
57
58 Revision 1.39  2007/04/17 12:43:57  acolla
59 Correction in StoreOCDB; change of text in mail to detector expert
60
61 Revision 1.38  2007/04/12 08:26:18  jgrosseo
62 updated comment
63
64 Revision 1.37  2007/04/10 16:53:14  jgrosseo
65 redirecting sub detector stdout, stderr to sub detector log file
66
67 Revision 1.35  2007/04/04 16:26:38  acolla
68 1. Re-organization of function calls in TestPreprocessor to make it more meaningful.
69 2. Added missing dependency in test preprocessors.
70 3. in AliShuttle.cxx: processing time and memory consumption info on a single line.
71
72 Revision 1.34  2007/04/04 10:33:36  jgrosseo
73 1) Storing of files to the Grid is now done _after_ your preprocessors succeeded. This is transparent, which means that you can still use the same functions (Store, StoreReferenceData) to store files to the Grid. However, the Shuttle first stores them locally and transfers them after the preprocessor finished. The return code of these two functions has changed from UInt_t to Bool_t which gives you the success of the storing.
74 In case of an error with the Grid, the Shuttle will retry the storing later, the preprocessor does not need to be run again.
75
76 2) The meaning of the return code of the preprocessor has changed. 0 is now success and any other value means failure. This value is stored in the log and you can use it to keep details about the error condition.
77
78 3) New function StoreReferenceFile to _directly_ store a file (without opening it) to the reference storage.
79
80 4) The memory usage of the preprocessor is monitored. If it exceeds 2 GB it is terminated.
81
82 5) New function AliPreprocessor::ProcessDCS(). If you do not need to have DCS data in all cases, you can skip the processing by implemting this function and returning kFALSE under certain conditions. E.g. if there is a certain run type.
83 If you always need DCS data (like before), you do not need to implement it.
84
85 6) The run type has been added to the monitoring page
86
87 Revision 1.33  2007/04/03 13:56:01  acolla
88 Grid Storage at the end of preprocessing. Added virtual method to disable DCS query according to the
89 run type.
90
91 Revision 1.32  2007/02/28 10:41:56  acolla
92 Run type field added in SHUTTLE framework. Run type is read from "run type" logbook and retrieved by
93 AliPreprocessor::GetRunType() function.
94 Added some ldap definition files.
95
96 Revision 1.30  2007/02/13 11:23:21  acolla
97 Moved getters and setters of Shuttle's main OCDB/Reference, local
98 OCDB/Reference, temp and log folders to AliShuttleInterface
99
100 Revision 1.27  2007/01/30 17:52:42  jgrosseo
101 adding monalisa monitoring
102
103 Revision 1.26  2007/01/23 19:20:03  acolla
104 Removed old ldif files, added TOF, MCH ldif files. Added some options in
105 AliShuttleConfig::Print. Added in Ali Shuttle: SetShuttleTempDir and
106 SetShuttleLogDir
107
108 Revision 1.25  2007/01/15 19:13:52  acolla
109 Moved some AliInfo to AliDebug in SendMail function
110
111 Revision 1.21  2006/12/07 08:51:26  jgrosseo
112 update (alberto):
113 table, db names in ldap configuration
114 added GRP preprocessor
115 DCS data can also be retrieved by data point
116
117 Revision 1.20  2006/11/16 16:16:48  jgrosseo
118 introducing strict run ordering flag
119 removed giving preprocessor name to preprocessor, they have to know their name themselves ;-)
120
121 Revision 1.19  2006/11/06 14:23:04  jgrosseo
122 major update (Alberto)
123 o) reading of run parameters from the logbook
124 o) online offline naming conversion
125 o) standalone DCSclient package
126
127 Revision 1.18  2006/10/20 15:22:59  jgrosseo
128 o) Adding time out to the execution of the preprocessors: The Shuttle forks and the parent process monitors the child
129 o) Merging Collect, CollectAll, CollectNew function
130 o) Removing implementation of empty copy constructors (declaration still there!)
131
132 Revision 1.17  2006/10/05 16:20:55  jgrosseo
133 adapting to new CDB classes
134
135 Revision 1.16  2006/10/05 15:46:26  jgrosseo
136 applying to the new interface
137
138 Revision 1.15  2006/10/02 16:38:39  jgrosseo
139 update (alberto):
140 fixed memory leaks
141 storing of objects that failed to be stored to the grid before
142 interfacing of shuttle status table in daq system
143
144 Revision 1.14  2006/08/29 09:16:05  jgrosseo
145 small update
146
147 Revision 1.13  2006/08/15 10:50:00  jgrosseo
148 effc++ corrections (alberto)
149
150 Revision 1.12  2006/08/08 14:19:29  jgrosseo
151 Update to shuttle classes (Alberto)
152
153 - Possibility to set the full object's path in the Preprocessor's and
154 Shuttle's  Store functions
155 - Possibility to extend the object's run validity in the same classes
156 ("startValidity" and "validityInfinite" parameters)
157 - Implementation of the StoreReferenceData function to store reference
158 data in a dedicated CDB storage.
159
160 Revision 1.11  2006/07/21 07:37:20  jgrosseo
161 last run is stored after each run
162
163 Revision 1.10  2006/07/20 09:54:40  jgrosseo
164 introducing status management: The processing per subdetector is divided into several steps,
165 after each step the status is stored on disk. If the system crashes in any of the steps the Shuttle
166 can keep track of the number of failures and skips further processing after a certain threshold is
167 exceeded. These thresholds can be configured in LDAP.
168
169 Revision 1.9  2006/07/19 10:09:55  jgrosseo
170 new configuration, accesst to DAQ FES (Alberto)
171
172 Revision 1.8  2006/07/11 12:44:36  jgrosseo
173 adding parameters for extended validity range of data produced by preprocessor
174
175 Revision 1.7  2006/07/10 14:37:09  jgrosseo
176 small fix + todo comment
177
178 Revision 1.6  2006/07/10 13:01:41  jgrosseo
179 enhanced storing of last sucessfully processed run (alberto)
180
181 Revision 1.5  2006/07/04 14:59:57  jgrosseo
182 revision of AliDCSValue: Removed wrapper classes, reduced storage size per value by factor 2
183
184 Revision 1.4  2006/06/12 09:11:16  jgrosseo
185 coding conventions (Alberto)
186
187 Revision 1.3  2006/06/06 14:26:40  jgrosseo
188 o) removed files that were moved to STEER
189 o) shuttle updated to follow the new interface (Alberto)
190
191 Revision 1.2  2006/03/07 07:52:34  hristov
192 New version (B.Yordanov)
193
194 Revision 1.6  2005/11/19 17:19:14  byordano
195 RetrieveDATEEntries and RetrieveConditionsData added
196
197 Revision 1.5  2005/11/19 11:09:27  byordano
198 AliShuttle declaration added
199
200 Revision 1.4  2005/11/17 17:47:34  byordano
201 TList changed to TObjArray
202
203 Revision 1.3  2005/11/17 14:43:23  byordano
204 import to local CVS
205
206 Revision 1.1.1.1  2005/10/28 07:33:58  hristov
207 Initial import as subdirectory in AliRoot
208
209 Revision 1.2  2005/09/13 08:41:15  byordano
210 default startTime endTime added
211
212 Revision 1.4  2005/08/30 09:13:02  byordano
213 some docs added
214
215 Revision 1.3  2005/08/29 21:15:47  byordano
216 some docs added
217
218 */
219
220 //
221 // This class is the main manager for AliShuttle. 
222 // It organizes the data retrieval from DCS and call the 
223 // interface methods of AliPreprocessor.
224 // For every detector in AliShuttleConfgi (see AliShuttleConfig),
225 // data for its set of aliases is retrieved. If there is registered
226 // AliPreprocessor for this detector then it will be used
227 // accroding to the schema (see AliPreprocessor).
228 // If there isn't registered AliPreprocessor than the retrieved
229 // data is stored automatically to the undelying AliCDBStorage.
230 // For detSpec is used the alias name.
231 //
232
233 #include "AliShuttle.h"
234
235 #include "AliCDBManager.h"
236 #include "AliCDBStorage.h"
237 #include "AliCDBId.h"
238 #include "AliCDBRunRange.h"
239 #include "AliCDBPath.h"
240 #include "AliCDBEntry.h"
241 #include "AliShuttleConfig.h"
242 #include "DCSClient/AliDCSClient.h"
243 #include "AliLog.h"
244 #include "AliPreprocessor.h"
245 #include "AliShuttleStatus.h"
246 #include "AliShuttleLogbookEntry.h"
247
248 #include <TSystem.h>
249 #include <TObject.h>
250 #include <TString.h>
251 #include <TTimeStamp.h>
252 #include <TObjString.h>
253 #include <TSQLServer.h>
254 #include <TSQLResult.h>
255 #include <TSQLRow.h>
256 #include <TMutex.h>
257 #include <TSystemDirectory.h>
258 #include <TSystemFile.h>
259 #include <TFile.h>
260 #include <TFileMerger.h>
261 #include <TGrid.h>
262 #include <TGridResult.h>
263
264 #include <TMonaLisaWriter.h>
265
266 #include <fstream>
267
268 #include <sys/types.h>
269 #include <sys/wait.h>
270
271 ClassImp(AliShuttle)
272
273 //______________________________________________________________________________________________
274 AliShuttle::AliShuttle(const AliShuttleConfig* config,
275                 UInt_t timeout, Int_t retries):
276 fConfig(config),
277 fTimeout(timeout), fRetries(retries),
278 fPreprocessorMap(),
279 fLogbookEntry(0),
280 fCurrentDetector(),
281 fStatusEntry(0),
282 fMonitoringMutex(0),
283 fLastActionTime(0),
284 fLastAction(),
285 fMonaLisa(0),
286 fTestMode(kNone),
287 fReadTestMode(kFALSE),
288 fOutputRedirected(kFALSE)
289 {
290         //
291         // config: AliShuttleConfig used
292         // timeout: timeout used for AliDCSClient connection
293         // retries: the number of retries in case of connection error.
294         //
295
296         if (!fConfig->IsValid()) AliFatal("********** !!!!! Invalid configuration !!!!! **********");
297         for(int iSys=0;iSys<4;iSys++) {
298                 fServer[iSys]=0;
299                 if (iSys < 3)
300                         fFXSlist[iSys].SetOwner(kTRUE);
301         }
302         fPreprocessorMap.SetOwner(kTRUE);
303
304         for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
305                 fFirstUnprocessed[iDet] = kFALSE;
306
307         fMonitoringMutex = new TMutex();
308 }
309
310 //______________________________________________________________________________________________
311 AliShuttle::~AliShuttle()
312 {
313         //
314         // destructor
315         //
316
317         fPreprocessorMap.DeleteAll();
318         for(int iSys=0;iSys<4;iSys++)
319                 if(fServer[iSys]) {
320                         fServer[iSys]->Close();
321                         delete fServer[iSys];
322                         fServer[iSys] = 0;
323                 }
324
325         if (fStatusEntry){
326                 delete fStatusEntry;
327                 fStatusEntry = 0;
328         }
329         
330         if (fMonitoringMutex) 
331         {
332                 delete fMonitoringMutex;
333                 fMonitoringMutex = 0;
334         }
335 }
336
337 //______________________________________________________________________________________________
338 void AliShuttle::RegisterPreprocessor(AliPreprocessor* preprocessor)
339 {
340         //
341         // Registers new AliPreprocessor.
342         // It uses GetName() for indentificator of the pre processor.
343         // The pre processor is registered it there isn't any other
344         // with the same identificator (GetName()).
345         //
346
347         const char* detName = preprocessor->GetName();
348         if(GetDetPos(detName) < 0)
349                 AliFatal(Form("********** !!!!! Invalid detector name: %s !!!!! **********", detName));
350
351         if (fPreprocessorMap.GetValue(detName)) {
352                 AliWarning(Form("AliPreprocessor %s is already registered!", detName));
353                 return;
354         }
355
356         fPreprocessorMap.Add(new TObjString(detName), preprocessor);
357 }
358 //______________________________________________________________________________________________
359 Bool_t AliShuttle::Store(const AliCDBPath& path, TObject* object,
360                 AliCDBMetaData* metaData, Int_t validityStart, Bool_t validityInfinite)
361 {
362         // Stores a CDB object in the storage for offline reconstruction. Objects that are not needed for
363         // offline reconstruction, but should be stored anyway (e.g. for debugging) should NOT be stored
364         // using this function. Use StoreReferenceData instead!
365         // It calls StoreLocally function which temporarily stores the data locally; when the preprocessor
366         // finishes the data are transferred to the main storage (Grid).
367
368         return StoreLocally(fgkLocalCDB, path, object, metaData, validityStart, validityInfinite);
369 }
370
371 //______________________________________________________________________________________________
372 Bool_t AliShuttle::StoreReferenceData(const AliCDBPath& path, TObject* object, AliCDBMetaData* metaData)
373 {
374         // Stores a CDB object in the storage for reference data. This objects will not be available during
375         // offline reconstrunction. Use this function for reference data only!
376         // It calls StoreLocally function which temporarily stores the data locally; when the preprocessor
377         // finishes the data are transferred to the main storage (Grid).
378
379         return StoreLocally(fgkLocalRefStorage, path, object, metaData);
380 }
381
382 //______________________________________________________________________________________________
383 Bool_t AliShuttle::StoreLocally(const TString& localUri,
384                         const AliCDBPath& path, TObject* object, AliCDBMetaData* metaData,
385                         Int_t validityStart, Bool_t validityInfinite)
386 {
387         // Store object temporarily in local storage. Parameters are passed by Store and StoreReferenceData functions.
388         // when the preprocessor finishes the data are transferred to the main storage (Grid).
389         // The parameters are:
390         //   1) Uri of the backup storage (Local)
391         //   2) the object's path.
392         //   3) the object to be stored
393         //   4) the metaData to be associated with the object
394         //   5) the validity start run number w.r.t. the current run,
395         //      if the data is valid only for this run leave the default 0
396         //   6) specifies if the calibration data is valid for infinity (this means until updated),
397         //      typical for calibration runs, the default is kFALSE
398         //
399         // returns 0 if fail, 1 otherwise
400
401         if (fTestMode & kErrorStorage)
402         {
403                 Log(fCurrentDetector, "StoreLocally - In TESTMODE - Simulating error while storing locally");
404                 return kFALSE;
405         }
406         
407         const char* cdbType = (localUri == fgkLocalCDB) ? "CDB" : "Reference";
408
409         Int_t firstRun = GetCurrentRun() - validityStart;
410         if(firstRun < 0) {
411                 AliWarning("First valid run happens to be less than 0! Setting it to 0.");
412                 firstRun=0;
413         }
414
415         Int_t lastRun = -1;
416         if(validityInfinite) {
417                 lastRun = AliCDBRunRange::Infinity();
418         } else {
419                 lastRun = GetCurrentRun();
420         }
421
422         // Version is set to current run, it will be used later to transfer data to Grid
423         AliCDBId id(path, firstRun, lastRun, GetCurrentRun(), -1);
424
425         if(! dynamic_cast<TObjString*> (metaData->GetProperty("RunUsed(TObjString)"))){
426                 TObjString runUsed = Form("%d", GetCurrentRun());
427                 metaData->SetProperty("RunUsed(TObjString)", runUsed.Clone());
428         }
429
430         Bool_t result = kFALSE;
431
432         if (!(AliCDBManager::Instance()->GetStorage(localUri))) {
433                 Log("SHUTTLE", Form("StoreLocally - Cannot activate local %s storage", cdbType));
434         } else {
435                 result = AliCDBManager::Instance()->GetStorage(localUri)
436                                         ->Put(object, id, metaData);
437         }
438
439         if(!result) {
440
441                 Log(fCurrentDetector, Form("StoreLocally - Can't store object <%s>!", id.ToString().Data()));
442         }
443
444         return result;
445 }
446
447 //______________________________________________________________________________________________
448 Bool_t AliShuttle::StoreOCDB()
449 {
450         //
451         // Called when preprocessor ends successfully or when previous storage attempt failed (kStoreError status)
452         // Calls underlying StoreOCDB(const char*) function twice, for OCDB and Reference storage.
453         // Then calls StoreRefFilesToGrid to store reference files. 
454         //
455         
456         if (fTestMode & kErrorGrid)
457         {
458                 Log("SHUTTLE", "StoreOCDB - In TESTMODE - Simulating error while storing in the Grid");
459                 Log(fCurrentDetector, "StoreOCDB - In TESTMODE - Simulating error while storing in the Grid");
460                 return kFALSE;
461         }
462         
463         Log("SHUTTLE","Storing OCDB data ...");
464         Bool_t resultCDB = StoreOCDB(fgkMainCDB);
465
466         Log("SHUTTLE","Storing reference data ...");
467         Bool_t resultRef = StoreOCDB(fgkMainRefStorage);
468         
469         Log("SHUTTLE","Storing reference files ...");
470         Bool_t resultRefFiles = StoreRefFilesToGrid();
471         
472         return resultCDB && resultRef && resultRefFiles;
473 }
474
475 //______________________________________________________________________________________________
476 Bool_t AliShuttle::StoreOCDB(const TString& gridURI)
477 {
478         //
479         // Called by StoreOCDB(), performs actual storage to the main OCDB and reference storages (Grid)
480         //
481
482         TObjArray* gridIds=0;
483
484         Bool_t result = kTRUE;
485
486         const char* type = 0;
487         TString localURI;
488         if(gridURI == fgkMainCDB) {
489                 type = "OCDB";
490                 localURI = fgkLocalCDB;
491         } else if(gridURI == fgkMainRefStorage) {
492                 type = "reference";
493                 localURI = fgkLocalRefStorage;
494         } else {
495                 AliError(Form("Invalid storage URI: %s", gridURI.Data()));
496                 return kFALSE;
497         }
498
499         AliCDBManager* man = AliCDBManager::Instance();
500
501         AliCDBStorage *gridSto = man->GetStorage(gridURI);
502         if(!gridSto) {
503                 Log("SHUTTLE",
504                         Form("StoreOCDB - cannot activate main %s storage", type));
505                 return kFALSE;
506         }
507
508         gridIds = gridSto->GetQueryCDBList();
509
510         // get objects previously stored in local CDB
511         AliCDBStorage *localSto = man->GetStorage(localURI);
512         if(!localSto) {
513                 Log("SHUTTLE",
514                         Form("StoreOCDB - cannot activate local %s storage", type));
515                 return kFALSE;
516         }
517         AliCDBPath aPath(GetOfflineDetName(fCurrentDetector.Data()),"*","*");
518         // Local objects were stored with current run as Grid version!
519         TList* localEntries = localSto->GetAll(aPath.GetPath(), GetCurrentRun(), GetCurrentRun());
520         localEntries->SetOwner(1);
521
522         // loop on local stored objects
523         TIter localIter(localEntries);
524         AliCDBEntry *aLocEntry = 0;
525         while((aLocEntry = dynamic_cast<AliCDBEntry*> (localIter.Next()))){
526                 aLocEntry->SetOwner(1);
527                 AliCDBId aLocId = aLocEntry->GetId();
528                 aLocEntry->SetVersion(-1);
529                 aLocEntry->SetSubVersion(-1);
530
531                 // If local object is valid up to infinity we store it only if it is
532                 // the first unprocessed run!
533                 if (aLocId.GetLastRun() == AliCDBRunRange::Infinity() &&
534                         !fFirstUnprocessed[GetDetPos(fCurrentDetector)])
535                 {
536                         Log("SHUTTLE", Form("StoreOCDB - %s: object %s has validity infinite but "
537                                                 "there are previous unprocessed runs!",
538                                                 fCurrentDetector.Data(), aLocId.GetPath().Data()));
539                         continue;
540                 }
541
542                 // loop on Grid valid Id's
543                 Bool_t store = kTRUE;
544                 TIter gridIter(gridIds);
545                 AliCDBId* aGridId = 0;
546                 while((aGridId = dynamic_cast<AliCDBId*> (gridIter.Next()))){
547                         if(aGridId->GetPath() != aLocId.GetPath()) continue;
548                         // skip all objects valid up to infinity
549                         if(aGridId->GetLastRun() == AliCDBRunRange::Infinity()) continue;
550                         // if we get here, it means there's already some more recent object stored on Grid!
551                         store = kFALSE;
552                         break;
553                 }
554
555                 // If we get here, the file can be stored!
556                 Bool_t storeOk = gridSto->Put(aLocEntry);
557                 if(!store || storeOk){
558
559                         if (!store)
560                         {
561                                 Log(fCurrentDetector.Data(),
562                                         Form("StoreOCDB - A more recent object already exists in %s storage: <%s>",
563                                                 type, aGridId->ToString().Data()));
564                         } else {
565                                 Log("SHUTTLE",
566                                         Form("StoreOCDB - Object <%s> successfully put into %s storage",
567                                                 aLocId.ToString().Data(), type));
568                                 Log(fCurrentDetector.Data(),
569                                         Form("StoreOCDB - Object <%s> successfully put into %s storage",
570                                                 aLocId.ToString().Data(), type));
571                         }
572
573                         // removing local filename...
574                         TString filename;
575                         localSto->IdToFilename(aLocId, filename);
576                         AliInfo(Form("Removing local file %s", filename.Data()));
577                         RemoveFile(filename.Data());
578                         continue;
579                 } else  {
580                         Log("SHUTTLE",
581                                 Form("StoreOCDB - Grid %s storage of object <%s> failed",
582                                         type, aLocId.ToString().Data()));
583                         Log(fCurrentDetector.Data(),
584                                 Form("StoreOCDB - Grid %s storage of object <%s> failed",
585                                         type, aLocId.ToString().Data()));
586                         result = kFALSE;
587                 }
588         }
589         localEntries->Clear();
590
591         return result;
592 }
593
594 //______________________________________________________________________________________________
595 Bool_t AliShuttle::CleanReferenceStorage(const char* detector)
596 {
597         // clears the directory used to store reference files of a given subdetector
598   
599         AliCDBManager* man = AliCDBManager::Instance();
600         AliCDBStorage* sto = man->GetStorage(fgkLocalRefStorage);
601         TString localBaseFolder = sto->GetBaseFolder();
602
603         TString targetDir = GetRefFilePrefix(localBaseFolder.Data(), detector);
604         
605         Log("SHUTTLE", Form("Cleaning %s", targetDir.Data()));
606
607         TString begin;
608         begin.Form("%d_", GetCurrentRun());
609         
610         TSystemDirectory* baseDir = new TSystemDirectory("/", targetDir);
611         if (!baseDir)
612                 return kTRUE;
613                 
614         TList* dirList = baseDir->GetListOfFiles();
615         delete baseDir;
616         
617         if (!dirList) return kTRUE;
618                         
619         if (dirList->GetEntries() < 3) 
620         {
621                 delete dirList;
622                 return kTRUE;
623         }
624                                 
625         Int_t nDirs = 0, nDel = 0;
626         TIter dirIter(dirList);
627         TSystemFile* entry = 0;
628
629         Bool_t success = kTRUE;
630         
631         while ((entry = dynamic_cast<TSystemFile*> (dirIter.Next())))
632         {                                       
633                 if (entry->IsDirectory())
634                         continue;
635                 
636                 TString fileName(entry->GetName());
637                 if (!fileName.BeginsWith(begin))
638                         continue;
639                         
640                 nDirs++;
641                                                 
642                 // delete file
643                 Int_t result = gSystem->Unlink(fileName.Data());
644                 
645                 if (result)
646                 {
647                         Log("SHUTTLE", Form("Could not delete file %s!", fileName.Data()));
648                         success = kFALSE;
649                 } else {
650                         nDel++;
651                 }
652         }
653
654         if(nDirs > 0)
655                 Log("SHUTTLE", Form("CleanReferenceStorage - %d (over %d) reference files in folder %s were deleted.", 
656                         nDel, nDirs, targetDir.Data()));
657
658                 
659         delete dirList;
660         return success;
661
662
663
664
665
666
667   Int_t result = gSystem->GetPathInfo(targetDir, 0, (Long64_t*) 0, 0, 0);
668   if (result == 0)
669   {
670     // delete directory
671     result = gSystem->Exec(Form("rm -r %s", targetDir.Data()));
672     if (result != 0)
673     {  
674       Log("SHUTTLE", Form("StoreReferenceFile - Could not clear directory %s", targetDir.Data()));
675       return kFALSE;
676     }
677   }
678
679   result = gSystem->mkdir(targetDir, kTRUE);
680   if (result != 0)
681   {
682     Log("SHUTTLE", Form("StoreReferenceFile - Error creating base directory %s", targetDir.Data()));
683     return kFALSE;
684   }
685         
686   return kTRUE;
687 }
688
689 //______________________________________________________________________________________________
690 Bool_t AliShuttle::StoreReferenceFile(const char* detector, const char* localFile, const char* gridFileName)
691 {
692         //
693         // Stores reference file directly (without opening it). This function stores the file locally.
694         //
695         // The file is stored under the following location: 
696         // <base folder of local reference storage>/<DET>/<RUN#>_<gridFileName>
697         // where <gridFileName> is the second parameter given to the function
698         // 
699         
700         if (fTestMode & kErrorStorage)
701         {
702                 Log(fCurrentDetector, "StoreReferenceFile - In TESTMODE - Simulating error while storing locally");
703                 return kFALSE;
704         }
705         
706         AliCDBManager* man = AliCDBManager::Instance();
707         AliCDBStorage* sto = man->GetStorage(fgkLocalRefStorage);
708         
709         TString localBaseFolder = sto->GetBaseFolder();
710         
711         TString targetDir = GetRefFilePrefix(localBaseFolder.Data(), detector); 
712         
713         //try to open folder, if does not exist
714         void* dir = gSystem->OpenDirectory(targetDir.Data());
715         if (dir == NULL) {
716                 if (gSystem->mkdir(targetDir.Data(), kTRUE)) {
717                         Log("SHUTTLE", Form("Can't open directory <%s>", targetDir.Data()));
718                         return kFALSE;
719                 }
720
721         } else {
722                 gSystem->FreeDirectory(dir);
723         }
724
725         TString target;
726         target.Form("%s/%d_%s", targetDir.Data(), GetCurrentRun(), gridFileName);
727         
728         Int_t result = gSystem->GetPathInfo(localFile, 0, (Long64_t*) 0, 0, 0);
729         if (result)
730         {
731                 Log("SHUTTLE", Form("StoreReferenceFile - %s does not exist", localFile));
732                 return kFALSE;
733         }
734
735         result = gSystem->CopyFile(localFile, target);
736
737         if (result == 0)
738         {
739                 Log("SHUTTLE", Form("StoreReferenceFile - File %s stored locally to %s", localFile, target.Data()));
740                 return kTRUE;
741         }
742         else
743         {
744                 Log("SHUTTLE", Form("StoreReferenceFile - Could not store file %s to %s!. Error code = %d", 
745                                 localFile, target.Data(), result));
746                 return kFALSE;
747         }       
748 }
749
750 //______________________________________________________________________________________________
751 Bool_t AliShuttle::StoreRefFilesToGrid()
752 {
753         //
754         // Transfers the reference file to the Grid.
755         //
756         // The files are stored under the following location: 
757         // <base folder of reference storage>/<DET>/<RUN#>_<gridFileName>
758         //
759         
760         AliCDBManager* man = AliCDBManager::Instance();
761         AliCDBStorage* sto = man->GetStorage(fgkLocalRefStorage);
762         if (!sto)
763                 return kFALSE;
764         TString localBaseFolder = sto->GetBaseFolder();
765                 
766         TString dir = GetRefFilePrefix(localBaseFolder.Data(), fCurrentDetector.Data());
767                 
768         AliCDBStorage* gridSto = man->GetStorage(fgkMainRefStorage);
769         if (!gridSto)
770                 return kFALSE;
771         
772         TString gridBaseFolder = gridSto->GetBaseFolder();
773
774         TString alienDir = GetRefFilePrefix(gridBaseFolder.Data(), fCurrentDetector.Data());
775         
776         TString begin;
777         begin.Form("%d_", GetCurrentRun());
778         
779         TSystemDirectory* baseDir = new TSystemDirectory("/", dir);
780         if (!baseDir)
781                 return kTRUE;
782                 
783         TList* dirList = baseDir->GetListOfFiles();
784         delete baseDir;
785         
786         if (!dirList) return kTRUE;
787                 
788         if (dirList->GetEntries() < 3) 
789         {
790                 delete dirList;
791                 return kTRUE;
792         }
793                         
794         if (!gGrid)
795         { 
796                 Log("SHUTTLE", "Connection to Grid failed: Cannot continue!");
797                 delete dirList;
798                 return kFALSE;
799         }
800         
801         Int_t nDirs = 0, nTransfer = 0;
802         TIter dirIter(dirList);
803         TSystemFile* entry = 0;
804
805         Bool_t success = kTRUE;
806         Bool_t first = kTRUE;
807         
808         while ((entry = dynamic_cast<TSystemFile*> (dirIter.Next())))
809         {                       
810                 if (entry->IsDirectory())
811                         continue;
812                         
813                 TString fileName(entry->GetName());
814                 if (!fileName.BeginsWith(begin))
815                         continue;
816                         
817                 nDirs++;
818                         
819                 if (first)
820                 {
821                         first = kFALSE;
822                         // check that DET folder exists, otherwise create it
823                         TGridResult* result = gGrid->Ls(alienDir.Data(), "a");
824                         
825                         if (!result)
826                         {
827                                 delete dirList;
828                                 return kFALSE;
829                         }
830                         
831                         if (!result->GetFileName(1)) // TODO: It looks like element 0 is always 0!!
832                         {
833                                 if (!gGrid->Mkdir(alienDir.Data(),"",0))
834                                 {
835                                         Log("SHUTTLE", Form("StoreRefFilesToGrid - Cannot create directory %s",
836                                                         alienDir.Data()));
837                                         delete dirList;
838                                         return kFALSE;
839                                 } else {
840                                         Log("SHUTTLE",Form("Folder %s created", alienDir.Data()));
841                                 }
842                                 
843                         } else {
844                                         Log("SHUTTLE",Form("Folder %s found", alienDir.Data()));
845                         }
846                 }
847                         
848                 TString fullLocalPath;
849                 fullLocalPath.Form("%s/%s", dir.Data(), fileName.Data());
850                 
851                 TString fullGridPath;
852                 fullGridPath.Form("alien://%s/%s", alienDir.Data(), fileName.Data());
853
854                 TFileMerger fileMerger;
855                 Bool_t result = TFile::Cp(fullLocalPath, fullGridPath);
856                 
857                 if (result)
858                 {
859                         Log("SHUTTLE", Form("StoreRefFilesToGrid - Copying local file %s to %s succeeded!", fullLocalPath.Data(), fullGridPath.Data()));
860                         RemoveFile(fullLocalPath);
861                         nTransfer++;
862                 }
863                 else
864                 {
865                         Log("SHUTTLE", Form("StoreRefFilesToGrid - Copying local file %s to %s FAILED!", fullLocalPath.Data(), fullGridPath.Data()));
866                         success = kFALSE;
867                 }
868         }
869
870         Log("SHUTTLE", Form("StoreRefFilesToGrid - %d (over %d) reference files in folder %s copied to Grid.", nTransfer, nDirs, dir.Data()));
871
872                 
873         delete dirList;
874         return success;
875 }
876
877 //______________________________________________________________________________________________
878 const char* AliShuttle::GetRefFilePrefix(const char* base, const char* detector)
879 {
880         //
881         // Get folder name of reference files 
882         //
883
884         TString offDetStr(GetOfflineDetName(detector));
885         TString dir;
886         if (offDetStr == "ITS" || offDetStr == "MUON" || offDetStr == "PHOS")
887         {
888                 dir.Form("%s/%s/%s", base, offDetStr.Data(), detector);
889         } else {
890                 dir.Form("%s/%s", base, offDetStr.Data());
891         }
892         
893         return dir.Data();
894         
895
896 }
897 //______________________________________________________________________________________________
898 void AliShuttle::CleanLocalStorage(const TString& uri)
899 {
900         //
901         // Called in case the preprocessor is declared failed. Remove remaining objects from the local storages.
902         //
903
904         const char* type = 0;
905         if(uri == fgkLocalCDB) {
906                 type = "OCDB";
907         } else if(uri == fgkLocalRefStorage) {
908                 type = "Reference";
909         } else {
910                 AliError(Form("Invalid storage URI: %s", uri.Data()));
911                 return;
912         }
913
914         AliCDBManager* man = AliCDBManager::Instance();
915
916         // open local storage
917         AliCDBStorage *localSto = man->GetStorage(uri);
918         if(!localSto) {
919                 Log("SHUTTLE",
920                         Form("CleanLocalStorage - cannot activate local %s storage", type));
921                 return;
922         }
923
924         TString filename(Form("%s/%s/*/Run*_v%d_s*.root",
925                 localSto->GetBaseFolder().Data(), GetOfflineDetName(fCurrentDetector.Data()), GetCurrentRun()));
926
927         AliInfo(Form("filename = %s", filename.Data()));
928
929         AliInfo(Form("Removing remaining local files from run %d and detector %s ...",
930                 GetCurrentRun(), fCurrentDetector.Data()));
931
932         RemoveFile(filename.Data());
933
934 }
935
936 //______________________________________________________________________________________________
937 void AliShuttle::RemoveFile(const char* filename)
938 {
939         //
940         // removes local file
941         //
942
943         TString command(Form("rm -f %s", filename));
944
945         Int_t result = gSystem->Exec(command.Data());
946         if(result != 0)
947         {
948                 Log("SHUTTLE", Form("RemoveFile - %s: Cannot remove file %s!",
949                         fCurrentDetector.Data(), filename));
950         }
951 }
952
953 //______________________________________________________________________________________________
954 AliShuttleStatus* AliShuttle::ReadShuttleStatus()
955 {
956         //
957         // Reads the AliShuttleStatus from the CDB
958         //
959
960         if (fStatusEntry){
961                 delete fStatusEntry;
962                 fStatusEntry = 0;
963         }
964
965         fStatusEntry = AliCDBManager::Instance()->GetStorage(GetLocalCDB())
966                 ->Get(Form("/SHUTTLE/STATUS/%s", fCurrentDetector.Data()), GetCurrentRun());
967
968         if (!fStatusEntry) return 0;
969         fStatusEntry->SetOwner(1);
970
971         AliShuttleStatus* status = dynamic_cast<AliShuttleStatus*> (fStatusEntry->GetObject());
972         if (!status) {
973                 AliError("Invalid object stored to CDB!");
974                 return 0;
975         }
976
977         return status;
978 }
979
980 //______________________________________________________________________________________________
981 Bool_t AliShuttle::WriteShuttleStatus(AliShuttleStatus* status)
982 {
983         //
984         // writes the status for one subdetector
985         //
986
987         if (fStatusEntry){
988                 delete fStatusEntry;
989                 fStatusEntry = 0;
990         }
991
992         Int_t run = GetCurrentRun();
993
994         AliCDBId id(AliCDBPath("SHUTTLE", "STATUS", fCurrentDetector), run, run);
995
996         fStatusEntry = new AliCDBEntry(status, id, new AliCDBMetaData);
997         fStatusEntry->SetOwner(1);
998
999         UInt_t result = AliCDBManager::Instance()->GetStorage(fgkLocalCDB)->Put(fStatusEntry);
1000
1001         if (!result) {
1002                 Log("SHUTTLE", Form("WriteShuttleStatus - Failed for %s, run %d",
1003                                                 fCurrentDetector.Data(), run));
1004                 return kFALSE;
1005         }
1006         
1007         SendMLInfo();
1008
1009         return kTRUE;
1010 }
1011
1012 //______________________________________________________________________________________________
1013 void AliShuttle::UpdateShuttleStatus(AliShuttleStatus::Status newStatus, Bool_t increaseCount)
1014 {
1015         //
1016         // changes the AliShuttleStatus for the given detector and run to the given status
1017         //
1018
1019         if (!fStatusEntry){
1020                 AliError("UNEXPECTED: fStatusEntry empty");
1021                 return;
1022         }
1023
1024         AliShuttleStatus* status = dynamic_cast<AliShuttleStatus*> (fStatusEntry->GetObject());
1025
1026         if (!status){
1027                 Log("SHUTTLE", "UNEXPECTED: status could not be read from current CDB entry");
1028                 return;
1029         }
1030
1031         TString actionStr = Form("UpdateShuttleStatus - %s: Changing state from %s to %s",
1032                                 fCurrentDetector.Data(),
1033                                 status->GetStatusName(),
1034                                 status->GetStatusName(newStatus));
1035         Log("SHUTTLE", actionStr);
1036         SetLastAction(actionStr);
1037
1038         status->SetStatus(newStatus);
1039         if (increaseCount) status->IncreaseCount();
1040
1041         AliCDBManager::Instance()->GetStorage(fgkLocalCDB)->Put(fStatusEntry);
1042
1043         SendMLInfo();
1044 }
1045
1046 //______________________________________________________________________________________________
1047 void AliShuttle::SendMLInfo()
1048 {
1049         //
1050         // sends ML information about the current status of the current detector being processed
1051         //
1052         
1053         AliShuttleStatus* status = dynamic_cast<AliShuttleStatus*> (fStatusEntry->GetObject());
1054         
1055         if (!status){
1056                 Log("SHUTTLE", "SendMLInfo - UNEXPECTED: status could not be read from current CDB entry");
1057                 return;
1058         }
1059         
1060         TMonaLisaText  mlStatus(Form("%s_status", fCurrentDetector.Data()), status->GetStatusName());
1061         TMonaLisaValue mlRetryCount(Form("%s_count", fCurrentDetector.Data()), status->GetCount());
1062
1063         TList mlList;
1064         mlList.Add(&mlStatus);
1065         mlList.Add(&mlRetryCount);
1066
1067         fMonaLisa->SendParameters(&mlList);
1068 }
1069
1070 //______________________________________________________________________________________________
1071 Bool_t AliShuttle::ContinueProcessing()
1072 {
1073         // this function reads the AliShuttleStatus information from CDB and
1074         // checks if the processing should be continued
1075         // if yes it returns kTRUE and updates the AliShuttleStatus with nextStatus
1076
1077         if (!fConfig->HostProcessDetector(fCurrentDetector)) return kFALSE;
1078
1079         AliPreprocessor* aPreprocessor =
1080                 dynamic_cast<AliPreprocessor*> (fPreprocessorMap.GetValue(fCurrentDetector));
1081         if (!aPreprocessor)
1082         {
1083                 AliInfo(Form("%s: no preprocessor registered", fCurrentDetector.Data()));
1084                 return kFALSE;
1085         }
1086
1087         AliShuttleLogbookEntry::Status entryStatus =
1088                 fLogbookEntry->GetDetectorStatus(fCurrentDetector);
1089
1090         if(entryStatus != AliShuttleLogbookEntry::kUnprocessed) {
1091                 AliInfo(Form("ContinueProcessing - %s is %s",
1092                                 fCurrentDetector.Data(),
1093                                 fLogbookEntry->GetDetectorStatusName(entryStatus)));
1094                 return kFALSE;
1095         }
1096
1097         // if we get here, according to Shuttle logbook subdetector is in UNPROCESSED state
1098
1099         // check if current run is first unprocessed run for current detector
1100         if (fConfig->StrictRunOrder(fCurrentDetector) &&
1101                 !fFirstUnprocessed[GetDetPos(fCurrentDetector)])
1102         {
1103                 if (fTestMode == kNone)
1104                 {
1105                         Log("SHUTTLE", Form("ContinueProcessing - %s requires strict run ordering but this is not the first unprocessed run!"));
1106                         return kFALSE;
1107                 }
1108                 else
1109                 {
1110                         Log("SHUTTLE", Form("ContinueProcessing - In TESTMODE - Although %s requires strict run ordering and this is not the first unprocessed run, the SHUTTLE continues"));
1111                 }
1112         }
1113
1114         AliShuttleStatus* status = ReadShuttleStatus();
1115         if (!status) {
1116                 // first time
1117                 Log("SHUTTLE", Form("ContinueProcessing - %s: Processing first time",
1118                                 fCurrentDetector.Data()));
1119                 status = new AliShuttleStatus(AliShuttleStatus::kStarted);
1120                 return WriteShuttleStatus(status);
1121         }
1122
1123         // The following two cases shouldn't happen if Shuttle Logbook was correctly updated.
1124         // If it happens it may mean Logbook updating failed... let's do it now!
1125         if (status->GetStatus() == AliShuttleStatus::kDone ||
1126             status->GetStatus() == AliShuttleStatus::kFailed){
1127                 Log("SHUTTLE", Form("ContinueProcessing - %s is already %s. Updating Shuttle Logbook",
1128                                         fCurrentDetector.Data(),
1129                                         status->GetStatusName(status->GetStatus())));
1130                 UpdateShuttleLogbook(fCurrentDetector.Data(),
1131                                         status->GetStatusName(status->GetStatus()));
1132                 return kFALSE;
1133         }
1134
1135         if (status->GetStatus() == AliShuttleStatus::kStoreError) {
1136                 Log("SHUTTLE",
1137                         Form("ContinueProcessing - %s: Grid storage of one or more objects failed. Trying again now",
1138                                 fCurrentDetector.Data()));
1139                 UpdateShuttleStatus(AliShuttleStatus::kStoreStarted);
1140                 if (StoreOCDB()){
1141                         Log("SHUTTLE", Form("ContinueProcessing - %s: all objects successfully stored into main storage",
1142                                 fCurrentDetector.Data()));
1143                         UpdateShuttleStatus(AliShuttleStatus::kDone);
1144                         UpdateShuttleLogbook(fCurrentDetector.Data(), "DONE");
1145                 } else {
1146                         Log("SHUTTLE",
1147                                 Form("ContinueProcessing - %s: Grid storage failed again",
1148                                         fCurrentDetector.Data()));
1149                         UpdateShuttleStatus(AliShuttleStatus::kStoreError);
1150                 }
1151                 return kFALSE;
1152         }
1153
1154         // if we get here, there is a restart
1155         Bool_t cont = kFALSE;
1156
1157         // abort conditions
1158         if (status->GetCount() >= fConfig->GetMaxRetries()) {
1159                 Log("SHUTTLE", Form("ContinueProcessing - %s failed %d times in status %s - "
1160                                 "Updating Shuttle Logbook", fCurrentDetector.Data(),
1161                                 status->GetCount(), status->GetStatusName()));
1162                 UpdateShuttleLogbook(fCurrentDetector.Data(), "FAILED");
1163                 UpdateShuttleStatus(AliShuttleStatus::kFailed);
1164
1165                 // there may still be objects in local OCDB and reference storage
1166                 // and FXS databases may be not updated: do it now!
1167                 
1168                 // TODO Currently disabled, we want to keep files in case of failure!
1169                 // CleanLocalStorage(fgkLocalCDB);
1170                 // CleanLocalStorage(fgkLocalRefStorage);
1171                 // UpdateTableFailCase();
1172                 
1173                 // Send mail to detector expert!
1174                 AliInfo(Form("Sending mail to %s expert...", fCurrentDetector.Data()));
1175                 if (!SendMail())
1176                         Log("SHUTTLE", Form("ContinueProcessing - Could not send mail to %s expert",
1177                                         fCurrentDetector.Data()));
1178
1179         } else {
1180                 Log("SHUTTLE", Form("ContinueProcessing - %s: restarting. "
1181                                 "Aborted before with %s. Retry number %d.", fCurrentDetector.Data(),
1182                                 status->GetStatusName(), status->GetCount()));
1183                 Bool_t increaseCount = kTRUE;
1184                 if (status->GetStatus() == AliShuttleStatus::kDCSError || status->GetStatus() == AliShuttleStatus::kDCSStarted)
1185                         increaseCount = kFALSE;
1186                 UpdateShuttleStatus(AliShuttleStatus::kStarted, increaseCount);
1187                 cont = kTRUE;
1188         }
1189
1190         return cont;
1191 }
1192
1193 //______________________________________________________________________________________________
1194 Bool_t AliShuttle::Process(AliShuttleLogbookEntry* entry)
1195 {
1196         //
1197         // Makes data retrieval for all detectors in the configuration.
1198         // entry: Shuttle logbook entry, contains run paramenters and status of detectors
1199         // (Unprocessed, Inactive, Failed or Done).
1200         // Returns kFALSE in case of error occured and kTRUE otherwise
1201         //
1202
1203         if (!entry) return kFALSE;
1204
1205         fLogbookEntry = entry;
1206
1207         AliInfo(Form("\n\n \t\t\t^*^*^*^*^*^*^*^*^*^*^*^* run %d: START ^*^*^*^*^*^*^*^*^*^*^*^* \n",
1208                                         GetCurrentRun()));
1209
1210         // create ML instance that monitors this run
1211         fMonaLisa = new TMonaLisaWriter(Form("%d", GetCurrentRun()), "SHUTTLE", "aliendb1.cern.ch");
1212         // disable monitoring of other parameters that come e.g. from TFile
1213         gMonitoringWriter = 0;
1214
1215         // Send the information to ML
1216         TMonaLisaText  mlStatus("SHUTTLE_status", "Processing");
1217         TMonaLisaText  mlRunType("SHUTTLE_runtype", Form("%s (%s)", entry->GetRunType(), entry->GetRunParameter("log")));
1218
1219         TList mlList;
1220         mlList.Add(&mlStatus);
1221         mlList.Add(&mlRunType);
1222
1223         fMonaLisa->SendParameters(&mlList);
1224
1225         if (fLogbookEntry->IsDone())
1226         {
1227                 Log("SHUTTLE","Process - Shuttle is already DONE. Updating logbook");
1228                 UpdateShuttleLogbook("shuttle_done");
1229                 fLogbookEntry = 0;
1230                 return kTRUE;
1231         }
1232
1233         // read test mode if flag is set
1234         if (fReadTestMode)
1235         {
1236                 fTestMode = kNone;
1237                 TString logEntry(entry->GetRunParameter("log"));
1238                 //printf("log entry = %s\n", logEntry.Data());
1239                 TString searchStr("Testmode: ");
1240                 Int_t pos = logEntry.Index(searchStr.Data());
1241                 //printf("%d\n", pos);
1242                 if (pos >= 0)
1243                 {
1244                         TSubString subStr = logEntry(pos + searchStr.Length(), logEntry.Length());
1245                         //printf("%s\n", subStr.String().Data());
1246                         TString newStr(subStr.Data());
1247                         TObjArray* token = newStr.Tokenize(' ');
1248                         if (token)
1249                         {
1250                                 //token->Print();
1251                                 TObjString* tmpStr = dynamic_cast<TObjString*> (token->First());
1252                                 if (tmpStr)
1253                                 {
1254                                         Int_t testMode = tmpStr->String().Atoi();
1255                                         if (testMode > 0)
1256                                         {
1257                                                 Log("SHUTTLE", Form("Enabling test mode %d", testMode));
1258                                                 SetTestMode((TestMode) testMode);
1259                                         }
1260                                 }
1261                                 delete token;          
1262                         }
1263                 }
1264         }
1265         
1266         Log("SHUTTLE", Form("The test mode flag is %d", (Int_t) fTestMode));
1267         
1268         fLogbookEntry->Print("all");
1269
1270         // Initialization
1271         Bool_t hasError = kFALSE;
1272
1273         AliCDBStorage *mainCDBSto = AliCDBManager::Instance()->GetStorage(fgkMainCDB);
1274         if(mainCDBSto) mainCDBSto->QueryCDB(GetCurrentRun());
1275         AliCDBStorage *mainRefSto = AliCDBManager::Instance()->GetStorage(fgkMainRefStorage);
1276         if(mainRefSto) mainRefSto->QueryCDB(GetCurrentRun());
1277
1278         // Loop on detectors in the configuration
1279         TIter iter(fConfig->GetDetectors());
1280         TObjString* aDetector = 0;
1281
1282         while ((aDetector = (TObjString*) iter.Next()))
1283         {
1284                 fCurrentDetector = aDetector->String();
1285
1286                 if (ContinueProcessing() == kFALSE) continue;
1287
1288                 AliInfo(Form("\n\n \t\t\t****** run %d - %s: START  ******",
1289                                                 GetCurrentRun(), aDetector->GetName()));
1290
1291                 for(Int_t iSys=0;iSys<3;iSys++) fFXSCalled[iSys]=kFALSE;
1292
1293                 Log(fCurrentDetector.Data(), "Starting processing");
1294
1295                 Int_t pid = fork();
1296
1297                 if (pid < 0)
1298                 {
1299                         Log("SHUTTLE", "ERROR: Forking failed");
1300                 }
1301                 else if (pid > 0)
1302                 {
1303                         // parent
1304                         AliInfo(Form("In parent process of %d - %s: Starting monitoring",
1305                                                         GetCurrentRun(), aDetector->GetName()));
1306
1307                         Long_t begin = time(0);
1308
1309                         int status; // to be used with waitpid, on purpose an int (not Int_t)!
1310                         while (waitpid(pid, &status, WNOHANG) == 0)
1311                         {
1312                                 Long_t expiredTime = time(0) - begin;
1313
1314                                 if (expiredTime > fConfig->GetPPTimeOut())
1315                                 {
1316                                         TString tmp;
1317                                         tmp.Form("Process of %s time out. Run time: %d seconds. Killing...",
1318                                                                 fCurrentDetector.Data(), expiredTime);
1319                                         Log("SHUTTLE", tmp);
1320                                         Log(fCurrentDetector, tmp);
1321
1322                                         kill(pid, 9);
1323
1324                                         UpdateShuttleStatus(AliShuttleStatus::kPPTimeOut);
1325                                         hasError = kTRUE;
1326
1327                                         gSystem->Sleep(1000);
1328                                 }
1329                                 else
1330                                 {
1331                                         gSystem->Sleep(1000);
1332                                         
1333                                         TString checkStr;
1334                                         checkStr.Form("ps -o vsize --pid %d | tail -n 1", pid);
1335                                         FILE* pipe = gSystem->OpenPipe(checkStr, "r");
1336                                         if (!pipe)
1337                                         {
1338                                                 Log("SHUTTLE", Form("Error: Could not open pipe to %s", checkStr.Data()));
1339                                                 continue;
1340                                         }
1341                                                 
1342                                         char buffer[100];
1343                                         if (!fgets(buffer, 100, pipe))
1344                                         {
1345                                                 Log("SHUTTLE", "Error: ps did not return anything");
1346                                                 gSystem->ClosePipe(pipe);
1347                                                 continue;
1348                                         }
1349                                         gSystem->ClosePipe(pipe);
1350                                         
1351                                         //Log("SHUTTLE", Form("ps returned %s", buffer));
1352                                         
1353                                         Int_t mem = 0;
1354                                         if ((sscanf(buffer, "%d\n", &mem) != 1) || !mem)
1355                                         {
1356                                                 Log("SHUTTLE", "Error: Could not parse output of ps");
1357                                                 continue;
1358                                         }
1359                                         
1360                                         if (expiredTime % 60 == 0)
1361                                                 Log("SHUTTLE", Form("%s: Checking process. Run time: %d seconds - Memory consumption: %d KB",
1362                                                                 fCurrentDetector.Data(), expiredTime, mem));
1363                                         
1364                                         if (mem > fConfig->GetPPMaxMem())
1365                                         {
1366                                                 TString tmp;
1367                                                 tmp.Form("Process exceeds maximum allowed memory (%d KB > %d KB). Killing...",
1368                                                         mem, fConfig->GetPPMaxMem());
1369                                                 Log("SHUTTLE", tmp);
1370                                                 Log(fCurrentDetector, tmp);
1371         
1372                                                 kill(pid, 9);
1373         
1374                                                 UpdateShuttleStatus(AliShuttleStatus::kPPOutOfMemory);
1375                                                 hasError = kTRUE;
1376         
1377                                                 gSystem->Sleep(1000);
1378                                         }
1379                                 }
1380                         }
1381
1382                         AliInfo(Form("In parent process of %d - %s: Client has terminated.",
1383                                                                 GetCurrentRun(), aDetector->GetName()));
1384
1385                         if (WIFEXITED(status))
1386                         {
1387                                 Int_t returnCode = WEXITSTATUS(status);
1388
1389                                 Log("SHUTTLE", Form("%s: the return code is %d", fCurrentDetector.Data(),
1390                                                                                 returnCode));
1391
1392                                 if (returnCode == 0) hasError = kTRUE;
1393                         }
1394                 }
1395                 else if (pid == 0)
1396                 {
1397                         // client
1398                         AliInfo(Form("In client process of %d - %s", GetCurrentRun(), aDetector->GetName()));
1399
1400                         AliInfo("Redirecting output...");
1401
1402                         if ((freopen(GetLogFileName(fCurrentDetector), "a", stdout)) == 0)
1403                         {
1404                                 Log("SHUTTLE", "Could not freopen stdout");
1405                         }
1406                         else
1407                         {
1408                                 fOutputRedirected = kTRUE;
1409                                 if ((dup2(fileno(stdout), fileno(stderr))) < 0)
1410                                         Log("SHUTTLE", "Could not redirect stderr");
1411                                 
1412                         }
1413                         
1414                         TString wd = gSystem->WorkingDirectory();
1415                         TString tmpDir = Form("%s/%s_process",GetShuttleTempDir(),fCurrentDetector.Data());
1416                         
1417                         gSystem->mkdir(tmpDir.Data());
1418                         gSystem->ChangeDirectory(tmpDir.Data());
1419                         
1420                         Bool_t success = ProcessCurrentDetector();
1421                         
1422                         gSystem->ChangeDirectory(wd.Data());
1423                         
1424                         gSystem->Exec(Form("rm -rf %s",tmpDir.Data()));
1425                         
1426                         if (success) // Preprocessor finished successfully!
1427                         { 
1428                                 // Update time_processed field in FXS DB
1429                                 if (UpdateTable() == kFALSE)
1430                                         Log("SHUTTLE", Form("Process - %s: Could not update FXS databases!", 
1431                                                         fCurrentDetector.Data()));
1432
1433                                 // Transfer the data from local storage to main storage (Grid)
1434                                 UpdateShuttleStatus(AliShuttleStatus::kStoreStarted);
1435                                 if (StoreOCDB() == kFALSE)
1436                                 {
1437                                         AliInfo(Form("\n \t\t\t****** run %d - %s: STORAGE ERROR ****** \n\n",
1438                                                         GetCurrentRun(), aDetector->GetName()));
1439                                         UpdateShuttleStatus(AliShuttleStatus::kStoreError);
1440                                         success = kFALSE;
1441                                 } else {
1442                                         AliInfo(Form("\n \t\t\t****** run %d - %s: DONE ****** \n\n",
1443                                                         GetCurrentRun(), aDetector->GetName()));
1444                                         UpdateShuttleStatus(AliShuttleStatus::kDone);
1445                                         UpdateShuttleLogbook(fCurrentDetector, "DONE");
1446                                 }
1447                         }
1448
1449                         for (UInt_t iSys=0; iSys<3; iSys++)
1450                         {
1451                                 if (fFXSCalled[iSys]) fFXSlist[iSys].Clear();
1452                         }
1453
1454                         AliInfo(Form("Client process of %d - %s is exiting now with %d.",
1455                                                         GetCurrentRun(), aDetector->GetName(), success));
1456
1457                         // the client exits here
1458                         gSystem->Exit(success);
1459
1460                         AliError("We should never get here!!!");
1461                 }
1462         }
1463
1464         AliInfo(Form("\n\n \t\t\t^*^*^*^*^*^*^*^*^*^*^*^* run %d: FINISH ^*^*^*^*^*^*^*^*^*^*^*^* \n",
1465                                                         GetCurrentRun()));
1466
1467         //check if shuttle is done for this run, if so update logbook
1468         TObjArray checkEntryArray;
1469         checkEntryArray.SetOwner(1);
1470         TString whereClause = Form("where run=%d", GetCurrentRun());
1471         if (!QueryShuttleLogbook(whereClause.Data(), checkEntryArray) || checkEntryArray.GetEntries() == 0) {
1472                 Log("SHUTTLE", Form("Process - Warning: Cannot check status of run %d on Shuttle logbook!",
1473                                                 GetCurrentRun()));
1474                 return hasError == kFALSE;
1475         }
1476
1477         AliShuttleLogbookEntry* checkEntry = dynamic_cast<AliShuttleLogbookEntry*>
1478                                                 (checkEntryArray.At(0));
1479
1480         if (checkEntry)
1481         {
1482                 if (checkEntry->IsDone())
1483                 {
1484                         Log("SHUTTLE","Process - Shuttle is DONE. Updating logbook");
1485                         UpdateShuttleLogbook("shuttle_done");
1486                 }
1487                 else
1488                 {
1489                         for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
1490                         {
1491                                 if (checkEntry->GetDetectorStatus(iDet) == AliShuttleLogbookEntry::kUnprocessed)
1492                                 {
1493                                         AliDebug(2, Form("Run %d: setting %s as \"not first time unprocessed\"",
1494                                                         checkEntry->GetRun(), GetDetName(iDet)));
1495                                         fFirstUnprocessed[iDet] = kFALSE;
1496                                 }
1497                         }
1498                 }
1499         }
1500
1501         // remove ML instance
1502         delete fMonaLisa;
1503         fMonaLisa = 0;
1504
1505         fLogbookEntry = 0;
1506
1507         return hasError == kFALSE;
1508 }
1509
1510 //______________________________________________________________________________________________
1511 Bool_t AliShuttle::ProcessCurrentDetector()
1512 {
1513         //
1514         // Makes data retrieval just for a specific detector (fCurrentDetector).
1515         // Threre should be a configuration for this detector.
1516
1517         AliInfo(Form("Retrieving values for %s, run %d", fCurrentDetector.Data(), GetCurrentRun()));
1518
1519         if (!CleanReferenceStorage(fCurrentDetector.Data()))
1520                 return kFALSE;
1521
1522         TMap* dcsMap = 0;
1523
1524         // call preprocessor
1525         AliPreprocessor* aPreprocessor =
1526                 dynamic_cast<AliPreprocessor*> (fPreprocessorMap.GetValue(fCurrentDetector));
1527
1528         aPreprocessor->Initialize(GetCurrentRun(), GetCurrentStartTime(), GetCurrentEndTime());
1529
1530         Bool_t processDCS = aPreprocessor->ProcessDCS();
1531
1532         if (!processDCS)
1533         {
1534                 Log(fCurrentDetector, "The preprocessor requested to skip the retrieval of DCS values");
1535         }
1536         else if (fTestMode & kSkipDCS)
1537         {
1538                 Log(fCurrentDetector, "In TESTMODE - Skipping DCS processing!");
1539         } 
1540         else if (fTestMode & kErrorDCS)
1541         {
1542                 Log(fCurrentDetector, "In TESTMODE - Simulating DCS error");
1543                 UpdateShuttleStatus(AliShuttleStatus::kDCSStarted);
1544                 UpdateShuttleStatus(AliShuttleStatus::kDCSError);
1545                 return kFALSE;
1546         } else {
1547
1548                 UpdateShuttleStatus(AliShuttleStatus::kDCSStarted);
1549
1550                 TString host(fConfig->GetDCSHost(fCurrentDetector));
1551                 Int_t port = fConfig->GetDCSPort(fCurrentDetector);
1552
1553                 if (fConfig->GetDCSAliases(fCurrentDetector)->GetEntries() > 0)
1554                 {
1555                         dcsMap = GetValueSet(host, port, fConfig->GetDCSAliases(fCurrentDetector), kAlias);
1556                         if (!dcsMap)
1557                         {
1558                                 Log(fCurrentDetector, "ProcessCurrentDetector - Error while retrieving DCS aliases");
1559                                 UpdateShuttleStatus(AliShuttleStatus::kDCSError);
1560                                 return kFALSE;
1561                         }
1562                 }
1563                 
1564                 if (fConfig->GetDCSDataPoints(fCurrentDetector)->GetEntries() > 0)
1565                 {
1566                         TMap* dcsMap2 = GetValueSet(host, port, fConfig->GetDCSDataPoints(fCurrentDetector), kDP);
1567                         if (!dcsMap2)
1568                         {
1569                                 Log(fCurrentDetector, "ProcessCurrentDetector - Error while retrieving DCS data points");
1570                                 UpdateShuttleStatus(AliShuttleStatus::kDCSError);
1571                                 if (dcsMap)
1572                                         delete dcsMap;
1573                                 return kFALSE;
1574                         }
1575                         
1576                         if (!dcsMap)
1577                         {
1578                                 dcsMap = dcsMap2;
1579                         }
1580                         else // merge
1581                         {
1582                                 TIter iter(dcsMap2);
1583                                 TObjString* key = 0;
1584                                 while ((key = (TObjString*) iter.Next()))
1585                                         dcsMap->Add(key, dcsMap2->GetValue(key->String()));
1586                                         
1587                                 dcsMap2->SetOwner(kFALSE);
1588                                 delete dcsMap2;
1589                         }
1590                 }
1591                 
1592         }
1593
1594         // still no map?
1595         if (!dcsMap)
1596                 dcsMap = new TMap;
1597         
1598         // DCS Archive DB processing successful. Call Preprocessor!
1599         UpdateShuttleStatus(AliShuttleStatus::kPPStarted);
1600
1601         UInt_t returnValue = aPreprocessor->Process(dcsMap);
1602
1603         if (returnValue > 0) // Preprocessor error!
1604         {
1605                 Log(fCurrentDetector, Form("Preprocessor failed. Process returned %d.", returnValue));
1606                 UpdateShuttleStatus(AliShuttleStatus::kPPError);
1607                 dcsMap->DeleteAll();
1608                 delete dcsMap;
1609                 return kFALSE;
1610         }
1611         
1612         // preprocessor ok!
1613         UpdateShuttleStatus(AliShuttleStatus::kPPDone);
1614         Log(fCurrentDetector, Form("ProcessCurrentDetector - %s preprocessor returned success",
1615                                 fCurrentDetector.Data()));
1616
1617         dcsMap->DeleteAll();
1618         delete dcsMap;
1619
1620         return kTRUE;
1621 }
1622
1623 //______________________________________________________________________________________________
1624 Bool_t AliShuttle::QueryShuttleLogbook(const char* whereClause,
1625                 TObjArray& entries)
1626 {
1627         // Query DAQ's Shuttle logbook and fills detector status object.
1628         // Call QueryRunParameters to query DAQ logbook for run parameters.
1629         //
1630
1631         entries.SetOwner(1);
1632
1633         // check connection, in case connect
1634         if(!Connect(3)) return kFALSE;
1635
1636         TString sqlQuery;
1637         sqlQuery = Form("select * from %s %s order by run", fConfig->GetShuttlelbTable(), whereClause);
1638
1639         TSQLResult* aResult = fServer[3]->Query(sqlQuery);
1640         if (!aResult) {
1641                 AliError(Form("Can't execute query <%s>!", sqlQuery.Data()));
1642                 return kFALSE;
1643         }
1644
1645         AliDebug(2,Form("Query = %s", sqlQuery.Data()));
1646
1647         if(aResult->GetRowCount() == 0) {
1648                 AliInfo("No entries in Shuttle Logbook match request");
1649                 delete aResult;
1650                 return kTRUE;
1651         }
1652
1653         // TODO Check field count!
1654         const UInt_t nCols = 23;
1655         if (aResult->GetFieldCount() != (Int_t) nCols) {
1656                 AliError("Invalid SQL result field number!");
1657                 delete aResult;
1658                 return kFALSE;
1659         }
1660
1661         TSQLRow* aRow;
1662         while ((aRow = aResult->Next())) {
1663                 TString runString(aRow->GetField(0), aRow->GetFieldLength(0));
1664                 Int_t run = runString.Atoi();
1665
1666                 AliShuttleLogbookEntry *entry = QueryRunParameters(run);
1667                 if (!entry)
1668                         continue;
1669
1670                 // loop on detectors
1671                 for(UInt_t ii = 0; ii < nCols; ii++)
1672                         entry->SetDetectorStatus(aResult->GetFieldName(ii), aRow->GetField(ii));
1673
1674                 entries.AddLast(entry);
1675                 delete aRow;
1676         }
1677
1678         delete aResult;
1679         return kTRUE;
1680 }
1681
1682 //______________________________________________________________________________________________
1683 AliShuttleLogbookEntry* AliShuttle::QueryRunParameters(Int_t run)
1684 {
1685         //
1686         // Retrieve run parameters written in the DAQ logbook and sets them into AliShuttleLogbookEntry object
1687         //
1688
1689         // check connection, in case connect
1690         if (!Connect(3))
1691                 return 0;
1692
1693         TString sqlQuery;
1694         sqlQuery.Form("select * from %s where run=%d", fConfig->GetDAQlbTable(), run);
1695
1696         TSQLResult* aResult = fServer[3]->Query(sqlQuery);
1697         if (!aResult) {
1698                 AliError(Form("Can't execute query <%s>!", sqlQuery.Data()));
1699                 return 0;
1700         }
1701
1702         if (aResult->GetRowCount() == 0) {
1703                 Log("SHUTTLE", Form("QueryRunParameters - No entry in DAQ Logbook for run %d. Skipping", run));
1704                 delete aResult;
1705                 return 0;
1706         }
1707
1708         if (aResult->GetRowCount() > 1) {
1709                 AliError(Form("More than one entry in DAQ Logbook for run %d. Skipping", run));
1710                 delete aResult;
1711                 return 0;
1712         }
1713
1714         TSQLRow* aRow = aResult->Next();
1715         if (!aRow)
1716         {
1717                 AliError(Form("Could not retrieve row for run %d. Skipping", run));
1718                 delete aResult;
1719                 return 0;
1720         }
1721
1722         AliShuttleLogbookEntry* entry = new AliShuttleLogbookEntry(run);
1723
1724         for (Int_t ii = 0; ii < aResult->GetFieldCount(); ii++)
1725                 entry->SetRunParameter(aResult->GetFieldName(ii), aRow->GetField(ii));
1726
1727         UInt_t startTime = entry->GetStartTime();
1728         UInt_t endTime = entry->GetEndTime();
1729
1730         if (!startTime || !endTime || startTime > endTime) {
1731                 Log("SHUTTLE",
1732                         Form("QueryRunParameters - Invalid parameters for Run %d: startTime = %d, endTime = %d",
1733                                 run, startTime, endTime));
1734                 delete entry;
1735                 delete aRow;
1736                 delete aResult;
1737                 return 0;
1738         }
1739
1740         delete aRow;
1741         delete aResult;
1742
1743         return entry;
1744 }
1745
1746 //______________________________________________________________________________________________
1747 Bool_t AliShuttle::GetValueSet(const char* host, Int_t port, const char* entry,
1748                                 TObjArray* valueSet, DCSType type)
1749 {
1750         // Retrieve all "entry" data points from the DCS server
1751         // host, port: TSocket connection parameters
1752         // entry: name of the alias or data point
1753         // valueSet: array of retrieved AliDCSValue's
1754         // type: kAlias or kDP
1755
1756         AliDCSClient client(host, port, fTimeout, fRetries);
1757         if (!client.IsConnected())
1758         {
1759                 return kFALSE;
1760         }
1761
1762         Int_t result=0;
1763
1764         if (type == kAlias)
1765         {
1766                 result = client.GetAliasValues(entry,
1767                         GetCurrentStartTime(), GetCurrentEndTime(), valueSet);
1768         } else
1769         if (type == kDP)
1770         {
1771                 result = client.GetDPValues(entry,
1772                         GetCurrentStartTime(), GetCurrentEndTime(), valueSet);
1773         }
1774
1775         if (result < 0)
1776         {
1777                 Log(fCurrentDetector.Data(), Form("GetValueSet - Can't get '%s'! Reason: %s",
1778                         entry, AliDCSClient::GetErrorString(result)));
1779
1780                 if (result == AliDCSClient::fgkServerError)
1781                 {
1782                         Log(fCurrentDetector.Data(), Form("GetValueSet - Server error: %s",
1783                                 client.GetServerError().Data()));
1784                 }
1785
1786                 return kFALSE;
1787         }
1788
1789         return kTRUE;
1790 }
1791
1792 //______________________________________________________________________________________________
1793 TMap* AliShuttle::GetValueSet(const char* host, Int_t port, const TSeqCollection* entries,
1794                               DCSType type)
1795 {
1796         // Retrieve all "entry" data points from the DCS server
1797         // host, port: TSocket connection parameters
1798         // entries: list of name of the alias or data point
1799         // type: kAlias or kDP
1800         // returns TMap of values, 0 when failure
1801
1802         const Int_t kSplit = 100; // maximum number of DPs at a time
1803         
1804         Int_t totalEntries = entries->GetEntries();
1805         
1806         TMap* result = 0;
1807         
1808         for (Int_t index=0; index < totalEntries; index += kSplit)
1809         {
1810                 Int_t endIndex = index + kSplit;
1811         
1812                 AliDCSClient client(host, port, fTimeout, fRetries);
1813                 if (!client.IsConnected())
1814                         return 0;
1815
1816                 TMap* partialResult = 0;
1817
1818                 if (type == kAlias)
1819                 {
1820                         partialResult = client.GetAliasValues(entries, GetCurrentStartTime(), 
1821                                 GetCurrentEndTime(), index, endIndex);
1822                 } 
1823                 else if (type == kDP)
1824                 {
1825                         partialResult = client.GetDPValues(entries, GetCurrentStartTime(), 
1826                                 GetCurrentEndTime(), index, endIndex);
1827                 }
1828
1829                 if (partialResult == 0)
1830                 {
1831                         Log(fCurrentDetector.Data(), Form("GetValueSet - Can't get entries (%d...%d)! Reason: %s",
1832                                 index, endIndex, client.GetServerError().Data()));
1833         
1834                         if (result)
1835                                 delete result;
1836                                 
1837                         return 0;
1838                 }
1839                 
1840                 AliInfo(Form("Retrieved entries %d..%d (total %d); E.g. %s has %d values collected",
1841                                         index, endIndex, totalEntries, entries->At(index)->GetName(), ((TObjArray*)
1842                                         partialResult->GetValue(entries->At(index)->GetName()))->GetEntriesFast()));
1843                 
1844                 if (!result)
1845                 {
1846                         result = partialResult;
1847                 }
1848                 else
1849                 {               
1850                         TIter iter(partialResult);
1851                         TObjString* key = 0;
1852                         while ((key = (TObjString*) iter.Next()))
1853                                 result->Add(key, partialResult->GetValue(key->String()));
1854                                 
1855                         partialResult->SetOwner(kFALSE);
1856                         delete partialResult;
1857                 }
1858         
1859         }
1860
1861         return result;
1862 }
1863 //______________________________________________________________________________________________
1864 const char* AliShuttle::GetFile(Int_t system, const char* detector,
1865                 const char* id, const char* source)
1866 {
1867         // Get calibration file from file exchange servers
1868         // First queris the FXS database for the file name, using the run, detector, id and source info
1869         // then calls RetrieveFile(filename) for actual copy to local disk
1870         // run: current run being processed (given by Logbook entry fLogbookEntry)
1871         // detector: the Preprocessor name
1872         // id: provided as a parameter by the Preprocessor
1873         // source: provided by the Preprocessor through GetFileSources function
1874
1875         // check if test mode should simulate a FXS error
1876         if (fTestMode & kErrorFXSFiles)
1877         {
1878                 Log(detector, Form("GetFile - In TESTMODE - Simulating error while connecting to %s FXS", GetSystemName(system)));
1879                 return 0;
1880         }
1881         
1882         // check connection, in case connect
1883         if (!Connect(system))
1884         {
1885                 Log(detector, Form("GetFile - Couldn't connect to %s FXS database", GetSystemName(system)));
1886                 return 0;
1887         }
1888
1889         // Query preparation
1890         TString sourceName(source);
1891         Int_t nFields = 3;
1892         TString sqlQueryStart = Form("select filePath,size,fileChecksum from %s where",
1893                                                                 fConfig->GetFXSdbTable(system));
1894         TString whereClause = Form("run=%d and detector=\"%s\" and fileId=\"%s\"",
1895                                                                 GetCurrentRun(), detector, id);
1896
1897         if (system == kDAQ)
1898         {
1899                 whereClause += Form(" and DAQsource=\"%s\"", source);
1900         }
1901         else if (system == kDCS)
1902         {
1903                 sourceName="none";
1904         }
1905         else if (system == kHLT)
1906         {
1907                 whereClause += Form(" and DDLnumbers=\"%s\"", source);
1908                 nFields = 3;
1909         }
1910
1911         TString sqlQuery = Form("%s %s", sqlQueryStart.Data(), whereClause.Data());
1912
1913         AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
1914
1915         // Query execution
1916         TSQLResult* aResult = 0;
1917         aResult = dynamic_cast<TSQLResult*> (fServer[system]->Query(sqlQuery));
1918         if (!aResult) {
1919                 Log(detector, Form("GetFileName - Can't execute SQL query to %s database for: id = %s, source = %s",
1920                                 GetSystemName(system), id, sourceName.Data()));
1921                 return 0;
1922         }
1923
1924         if(aResult->GetRowCount() == 0)
1925         {
1926                 Log(detector,
1927                         Form("GetFileName - No entry in %s FXS db for: id = %s, source = %s",
1928                                 GetSystemName(system), id, sourceName.Data()));
1929                 delete aResult;
1930                 return 0;
1931         }
1932
1933         if (aResult->GetRowCount() > 1) {
1934                 Log(detector,
1935                         Form("GetFileName - More than one entry in %s FXS db for: id = %s, source = %s",
1936                                 GetSystemName(system), id, sourceName.Data()));
1937                 delete aResult;
1938                 return 0;
1939         }
1940
1941         if (aResult->GetFieldCount() != nFields) {
1942                 Log(detector,
1943                         Form("GetFileName - Wrong field count in %s FXS db for: id = %s, source = %s",
1944                                 GetSystemName(system), id, sourceName.Data()));
1945                 delete aResult;
1946                 return 0;
1947         }
1948
1949         TSQLRow* aRow = dynamic_cast<TSQLRow*> (aResult->Next());
1950
1951         if (!aRow){
1952                 Log(detector, Form("GetFileName - Empty set result in %s FXS db from query: id = %s, source = %s",
1953                                 GetSystemName(system), id, sourceName.Data()));
1954                 delete aResult;
1955                 return 0;
1956         }
1957
1958         TString filePath(aRow->GetField(0), aRow->GetFieldLength(0));
1959         TString fileSize(aRow->GetField(1), aRow->GetFieldLength(1));
1960         TString fileChecksum(aRow->GetField(2), aRow->GetFieldLength(2));
1961
1962         delete aResult;
1963         delete aRow;
1964
1965         AliDebug(2, Form("filePath = %s; size = %s, fileChecksum = %s",
1966                                 filePath.Data(), fileSize.Data(), fileChecksum.Data()));
1967
1968         // retrieved file is renamed to make it unique
1969         TString localFileName = Form("%s_%s_%d_%s_%s.shuttle",
1970                                         GetSystemName(system), detector, GetCurrentRun(), id, sourceName.Data());
1971
1972
1973         // file retrieval from FXS
1974         UInt_t nRetries = 0;
1975         UInt_t maxRetries = 3;
1976         Bool_t result = kFALSE;
1977
1978         // copy!! if successful TSystem::Exec returns 0
1979         while(nRetries++ < maxRetries) {
1980                 AliDebug(2, Form("Trying to copy file. Retry # %d", nRetries));
1981                 result = RetrieveFile(system, filePath.Data(), localFileName.Data());
1982                 if(!result)
1983                 {
1984                         Log(detector, Form("GetFileName - Copy of file %s from %s FXS failed",
1985                                         filePath.Data(), GetSystemName(system)));
1986                         continue;
1987                 } else {
1988                         AliInfo(Form("File %s copied from %s FXS into %s/%s",
1989                                                 filePath.Data(), GetSystemName(system),
1990                                                 GetShuttleTempDir(), localFileName.Data()));
1991                 }
1992
1993                 if (fileChecksum.Length()>0)
1994                 {
1995                         // compare md5sum of local file with the one stored in the FXS DB
1996                         Int_t md5Comp = gSystem->Exec(Form("md5sum %s/%s |grep %s 2>&1 > /dev/null",
1997                                                 GetShuttleTempDir(), localFileName.Data(), fileChecksum.Data()));
1998
1999                         if (md5Comp != 0)
2000                         {
2001                                 Log(detector, Form("GetFileName - md5sum of file %s does not match with local copy!",
2002                                                         filePath.Data()));
2003                                 result = kFALSE;
2004                                 continue;
2005                         }
2006                 } else {
2007                         Log(fCurrentDetector, Form("GetFile - md5sum of file %s not set in %s database, skipping comparison",
2008                                                         filePath.Data(), GetSystemName(system)));
2009                 }
2010                 if (result) break;
2011         }
2012
2013         if(!result) return 0;
2014
2015         fFXSCalled[system]=kTRUE;
2016         TObjString *fileParams = new TObjString(Form("%s#!?!#%s", id, sourceName.Data()));
2017         fFXSlist[system].Add(fileParams);
2018
2019         static TString fullLocalFileName;
2020         fullLocalFileName = TString::Format("%s/%s", GetShuttleTempDir(), localFileName.Data());
2021
2022         AliInfo(Form("fullLocalFileName = %s", fullLocalFileName.Data()));
2023
2024         return fullLocalFileName.Data();
2025
2026 }
2027
2028 //______________________________________________________________________________________________
2029 Bool_t AliShuttle::RetrieveFile(UInt_t system, const char* fxsFileName, const char* localFileName)
2030 {
2031         //
2032         // Copies file from FXS to local Shuttle machine
2033         //
2034
2035         // check temp directory: trying to cd to temp; if it does not exist, create it
2036         AliDebug(2, Form("Copy file %s from %s FXS into %s/%s",
2037                         GetSystemName(system), fxsFileName, GetShuttleTempDir(), localFileName));
2038
2039         void* dir = gSystem->OpenDirectory(GetShuttleTempDir());
2040         if (dir == NULL) {
2041                 if (gSystem->mkdir(GetShuttleTempDir(), kTRUE)) {
2042                         AliError(Form("Can't open directory <%s>", GetShuttleTempDir()));
2043                         return kFALSE;
2044                 }
2045
2046         } else {
2047                 gSystem->FreeDirectory(dir);
2048         }
2049
2050         TString baseFXSFolder;
2051         if (system == kDAQ)
2052         {
2053                 baseFXSFolder = "FES/";
2054         }
2055         else if (system == kDCS)
2056         {
2057                 baseFXSFolder = "";
2058         }
2059         else if (system == kHLT)
2060         {
2061                 baseFXSFolder = "/opt/FXS";
2062         }
2063
2064
2065         TString command = Form("scp -oPort=%d -2 %s@%s:%s%s %s/%s",
2066                 fConfig->GetFXSPort(system),
2067                 fConfig->GetFXSUser(system),
2068                 fConfig->GetFXSHost(system),
2069                 baseFXSFolder.Data(),
2070                 fxsFileName,
2071                 GetShuttleTempDir(),
2072                 localFileName);
2073
2074         AliDebug(2, Form("%s",command.Data()));
2075
2076         Bool_t result = (gSystem->Exec(command.Data()) == 0);
2077
2078         return result;
2079 }
2080
2081 //______________________________________________________________________________________________
2082 TList* AliShuttle::GetFileSources(Int_t system, const char* detector, const char* id)
2083 {
2084         //
2085         // Get sources producing the condition file Id from file exchange servers
2086         // if id is NULL all sources are returned (distinct)
2087         //
2088         
2089         // check if test mode should simulate a FXS error
2090         if (fTestMode & kErrorFXSSources)
2091         {
2092                 Log(detector, Form("GetFileSources - In TESTMODE - Simulating error while connecting to %s FXS", GetSystemName(system)));
2093                 return 0;
2094         }
2095
2096
2097         if (system == kDCS)
2098         {
2099                 AliWarning("DCS system has only one source of data!");
2100                 TList *list = new TList();
2101                 list->SetOwner(1);
2102                 list->Add(new TObjString(" "));
2103                 return list;
2104         }
2105
2106         // check connection, in case connect
2107         if (!Connect(system))
2108         {
2109                 Log(detector, Form("GetFileSources - Couldn't connect to %s FXS database", GetSystemName(system)));
2110                 return NULL;
2111         }
2112
2113         TString sourceName = 0;
2114         if (system == kDAQ)
2115         {
2116                 sourceName = "DAQsource";
2117         } else if (system == kHLT)
2118         {
2119                 sourceName = "DDLnumbers";
2120         }
2121
2122         TString sqlQueryStart = Form("select distinct %s from %s where", sourceName.Data(), fConfig->GetFXSdbTable(system));
2123         TString whereClause = Form("run=%d and detector=\"%s\"",
2124                                 GetCurrentRun(), detector);
2125         if (id)
2126                 whereClause += Form(" and fileId=\"%s\"", id);
2127         TString sqlQuery = Form("%s %s", sqlQueryStart.Data(), whereClause.Data());
2128
2129         AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
2130
2131         // Query execution
2132         TSQLResult* aResult;
2133         aResult = fServer[system]->Query(sqlQuery);
2134         if (!aResult) {
2135                 Log(detector, Form("GetFileSources - Can't execute SQL query to %s database for id: %s",
2136                                 GetSystemName(system), id));
2137                 return 0;
2138         }
2139
2140         TList *list = new TList();
2141         list->SetOwner(1);
2142         
2143         if (aResult->GetRowCount() == 0)
2144         {
2145                 Log(detector,
2146                         Form("GetFileSources - No entry in %s FXS table for id: %s", GetSystemName(system), id));
2147                 delete aResult;
2148                 return list;
2149         }
2150
2151         TSQLRow* aRow;
2152
2153         while ((aRow = aResult->Next()))
2154         {
2155
2156                 TString source(aRow->GetField(0), aRow->GetFieldLength(0));
2157                 AliDebug(2, Form("%s = %s", sourceName.Data(), source.Data()));
2158                 list->Add(new TObjString(source));
2159                 delete aRow;
2160         }
2161
2162         delete aResult;
2163
2164         return list;
2165 }
2166
2167 //______________________________________________________________________________________________
2168 TList* AliShuttle::GetFileIDs(Int_t system, const char* detector, const char* source)
2169 {
2170         //
2171         // Get all ids of condition files produced by a given source from file exchange servers
2172         //
2173         
2174         // check if test mode should simulate a FXS error
2175         if (fTestMode & kErrorFXSSources)
2176         {
2177                 Log(detector, Form("GetFileIDs - In TESTMODE - Simulating error while connecting to %s FXS", GetSystemName(system)));
2178                 return 0;
2179         }
2180
2181         // check connection, in case connect
2182         if (!Connect(system))
2183         {
2184                 Log(detector, Form("GetFileIDs - Couldn't connect to %s FXS database", GetSystemName(system)));
2185                 return NULL;
2186         }
2187
2188         TString sourceName = 0;
2189         if (system == kDAQ)
2190         {
2191                 sourceName = "DAQsource";
2192         } else if (system == kHLT)
2193         {
2194                 sourceName = "DDLnumbers";
2195         }
2196
2197         TString sqlQueryStart = Form("select fileId from %s where", fConfig->GetFXSdbTable(system));
2198         TString whereClause = Form("run=%d and detector=\"%s\"",
2199                                 GetCurrentRun(), detector);
2200         if (sourceName.Length() > 0 && source)
2201                 whereClause += Form(" and %s=\"%s\"", sourceName.Data(), source);
2202         TString sqlQuery = Form("%s %s", sqlQueryStart.Data(), whereClause.Data());
2203
2204         AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
2205
2206         // Query execution
2207         TSQLResult* aResult;
2208         aResult = fServer[system]->Query(sqlQuery);
2209         if (!aResult) {
2210                 Log(detector, Form("GetFileIDs - Can't execute SQL query to %s database for source: %s",
2211                                 GetSystemName(system), source));
2212                 return 0;
2213         }
2214
2215         TList *list = new TList();
2216         list->SetOwner(1);
2217         
2218         if (aResult->GetRowCount() == 0)
2219         {
2220                 Log(detector,
2221                         Form("GetFileIDs - No entry in %s FXS table for source: %s", GetSystemName(system), source));
2222                 delete aResult;
2223                 return list;
2224         }
2225
2226         TSQLRow* aRow;
2227
2228         while ((aRow = aResult->Next()))
2229         {
2230
2231                 TString id(aRow->GetField(0), aRow->GetFieldLength(0));
2232                 AliDebug(2, Form("fileId = %s", id.Data()));
2233                 list->Add(new TObjString(id));
2234                 delete aRow;
2235         }
2236
2237         delete aResult;
2238
2239         return list;
2240 }
2241
2242 //______________________________________________________________________________________________
2243 Bool_t AliShuttle::Connect(Int_t system)
2244 {
2245         // Connect to MySQL Server of the system's FXS MySQL databases
2246         // DAQ Logbook, Shuttle Logbook and DAQ FXS db are on the same host
2247         //
2248
2249         // check connection: if already connected return
2250         if(fServer[system] && fServer[system]->IsConnected()) return kTRUE;
2251
2252         TString dbHost, dbUser, dbPass, dbName;
2253
2254         if (system < 3) // FXS db servers
2255         {
2256                 dbHost = Form("mysql://%s:%d", fConfig->GetFXSdbHost(system), fConfig->GetFXSdbPort(system));
2257                 dbUser = fConfig->GetFXSdbUser(system);
2258                 dbPass = fConfig->GetFXSdbPass(system);
2259                 dbName =   fConfig->GetFXSdbName(system);
2260         } else { // Run & Shuttle logbook servers
2261         // TODO Will the Shuttle logbook server be the same as the Run logbook server ???
2262                 dbHost = Form("mysql://%s:%d", fConfig->GetDAQlbHost(), fConfig->GetDAQlbPort());
2263                 dbUser = fConfig->GetDAQlbUser();
2264                 dbPass = fConfig->GetDAQlbPass();
2265                 dbName =   fConfig->GetDAQlbDB();
2266         }
2267
2268         fServer[system] = TSQLServer::Connect(dbHost.Data(), dbUser.Data(), dbPass.Data());
2269         if (!fServer[system] || !fServer[system]->IsConnected()) {
2270                 if(system < 3)
2271                 {
2272                 AliError(Form("Can't establish connection to FXS database for %s",
2273                                         AliShuttleInterface::GetSystemName(system)));
2274                 } else {
2275                 AliError("Can't establish connection to Run logbook.");
2276                 }
2277                 if(fServer[system]) delete fServer[system];
2278                 return kFALSE;
2279         }
2280
2281         // Get tables
2282         TSQLResult* aResult=0;
2283         switch(system){
2284                 case kDAQ:
2285                         aResult = fServer[kDAQ]->GetTables(dbName.Data());
2286                         break;
2287                 case kDCS:
2288                         aResult = fServer[kDCS]->GetTables(dbName.Data());
2289                         break;
2290                 case kHLT:
2291                         aResult = fServer[kHLT]->GetTables(dbName.Data());
2292                         break;
2293                 default:
2294                         aResult = fServer[3]->GetTables(dbName.Data());
2295                         break;
2296         }
2297
2298         delete aResult;
2299         return kTRUE;
2300 }
2301
2302 //______________________________________________________________________________________________
2303 Bool_t AliShuttle::UpdateTable()
2304 {
2305         //
2306         // Update FXS table filling time_processed field in all rows corresponding to current run and detector
2307         //
2308
2309         Bool_t result = kTRUE;
2310
2311         for (UInt_t system=0; system<3; system++)
2312         {
2313                 if(!fFXSCalled[system]) continue;
2314
2315                 // check connection, in case connect
2316                 if (!Connect(system))
2317                 {
2318                         Log(fCurrentDetector, Form("UpdateTable - Couldn't connect to %s FXS database", GetSystemName(system)));
2319                         result = kFALSE;
2320                         continue;
2321                 }
2322
2323                 TTimeStamp now; // now
2324
2325                 // Loop on FXS list entries
2326                 TIter iter(&fFXSlist[system]);
2327                 TObjString *aFXSentry=0;
2328                 while ((aFXSentry = dynamic_cast<TObjString*> (iter.Next())))
2329                 {
2330                         TString aFXSentrystr = aFXSentry->String();
2331                         TObjArray *aFXSarray = aFXSentrystr.Tokenize("#!?!#");
2332                         if (!aFXSarray || aFXSarray->GetEntries() != 2 )
2333                         {
2334                                 Log(fCurrentDetector, Form("UpdateTable - error updating %s FXS entry. Check string: <%s>",
2335                                         GetSystemName(system), aFXSentrystr.Data()));
2336                                 if(aFXSarray) delete aFXSarray;
2337                                 result = kFALSE;
2338                                 continue;
2339                         }
2340                         const char* fileId = ((TObjString*) aFXSarray->At(0))->GetName();
2341                         const char* source = ((TObjString*) aFXSarray->At(1))->GetName();
2342
2343                         TString whereClause;
2344                         if (system == kDAQ)
2345                         {
2346                                 whereClause = Form("where run=%d and detector=\"%s\" and fileId=\"%s\" and DAQsource=\"%s\";",
2347                                                         GetCurrentRun(), fCurrentDetector.Data(), fileId, source);
2348                         }
2349                         else if (system == kDCS)
2350                         {
2351                                 whereClause = Form("where run=%d and detector=\"%s\" and fileId=\"%s\";",
2352                                                         GetCurrentRun(), fCurrentDetector.Data(), fileId);
2353                         }
2354                         else if (system == kHLT)
2355                         {
2356                                 whereClause = Form("where run=%d and detector=\"%s\" and fileId=\"%s\" and DDLnumbers=\"%s\";",
2357                                                         GetCurrentRun(), fCurrentDetector.Data(), fileId, source);
2358                         }
2359
2360                         delete aFXSarray;
2361
2362                         TString sqlQuery = Form("update %s set time_processed=%d %s", fConfig->GetFXSdbTable(system),
2363                                                                 now.GetSec(), whereClause.Data());
2364
2365                         AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
2366
2367                         // Query execution
2368                         TSQLResult* aResult;
2369                         aResult = dynamic_cast<TSQLResult*> (fServer[system]->Query(sqlQuery));
2370                         if (!aResult)
2371                         {
2372                                 Log(fCurrentDetector, Form("UpdateTable - %s db: can't execute SQL query <%s>",
2373                                                                 GetSystemName(system), sqlQuery.Data()));
2374                                 result = kFALSE;
2375                                 continue;
2376                         }
2377                         delete aResult;
2378                 }
2379         }
2380
2381         return result;
2382 }
2383
2384 //______________________________________________________________________________________________
2385 Bool_t AliShuttle::UpdateTableFailCase()
2386 {
2387         // Update FXS table filling time_processed field in all rows corresponding to current run and detector
2388         // this is called in case the preprocessor is declared failed for the current run, because
2389         // the fields are updated only in case of success
2390
2391         Bool_t result = kTRUE;
2392
2393         for (UInt_t system=0; system<3; system++)
2394         {
2395                 // check connection, in case connect
2396                 if (!Connect(system))
2397                 {
2398                         Log(fCurrentDetector, Form("UpdateTableFailCase - Couldn't connect to %s FXS database",
2399                                                         GetSystemName(system)));
2400                         result = kFALSE;
2401                         continue;
2402                 }
2403
2404                 TTimeStamp now; // now
2405
2406                 // Loop on FXS list entries
2407
2408                 TString whereClause = Form("where run=%d and detector=\"%s\";",
2409                                                 GetCurrentRun(), fCurrentDetector.Data());
2410
2411
2412                 TString sqlQuery = Form("update %s set time_processed=%d %s", fConfig->GetFXSdbTable(system),
2413                                                         now.GetSec(), whereClause.Data());
2414
2415                 AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
2416
2417                 // Query execution
2418                 TSQLResult* aResult;
2419                 aResult = dynamic_cast<TSQLResult*> (fServer[system]->Query(sqlQuery));
2420                 if (!aResult)
2421                 {
2422                         Log(fCurrentDetector, Form("UpdateTableFailCase - %s db: can't execute SQL query <%s>",
2423                                                         GetSystemName(system), sqlQuery.Data()));
2424                         result = kFALSE;
2425                         continue;
2426                 }
2427                 delete aResult;
2428         }
2429
2430         return result;
2431 }
2432
2433 //______________________________________________________________________________________________
2434 Bool_t AliShuttle::UpdateShuttleLogbook(const char* detector, const char* status)
2435 {
2436         //
2437         // Update Shuttle logbook filling detector or shuttle_done column
2438         // ex. of usage: UpdateShuttleLogbook("PHOS", "DONE") or UpdateShuttleLogbook("shuttle_done")
2439         //
2440
2441         // check connection, in case connect
2442         if(!Connect(3)){
2443                 Log("SHUTTLE", "UpdateShuttleLogbook - Couldn't connect to DAQ Logbook.");
2444                 return kFALSE;
2445         }
2446
2447         TString detName(detector);
2448         TString setClause;
2449         if(detName == "shuttle_done")
2450         {
2451                 setClause = "set shuttle_done=1";
2452
2453                 // Send the information to ML
2454                 TMonaLisaText  mlStatus("SHUTTLE_status", "Done");
2455
2456                 TList mlList;
2457                 mlList.Add(&mlStatus);
2458
2459                 fMonaLisa->SendParameters(&mlList);
2460         } else {
2461                 TString statusStr(status);
2462                 if(statusStr.Contains("done", TString::kIgnoreCase) ||
2463                    statusStr.Contains("failed", TString::kIgnoreCase)){
2464                         setClause = Form("set %s=\"%s\"", detector, status);
2465                 } else {
2466                         Log("SHUTTLE",
2467                                 Form("UpdateShuttleLogbook - Invalid status <%s> for detector %s",
2468                                         status, detector));
2469                         return kFALSE;
2470                 }
2471         }
2472
2473         TString whereClause = Form("where run=%d", GetCurrentRun());
2474
2475         TString sqlQuery = Form("update %s %s %s",
2476                                         fConfig->GetShuttlelbTable(), setClause.Data(), whereClause.Data());
2477
2478         AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
2479
2480         // Query execution
2481         TSQLResult* aResult;
2482         aResult = dynamic_cast<TSQLResult*> (fServer[3]->Query(sqlQuery));
2483         if (!aResult) {
2484                 Log("SHUTTLE", Form("UpdateShuttleLogbook - Can't execute query <%s>", sqlQuery.Data()));
2485                 return kFALSE;
2486         }
2487         delete aResult;
2488
2489         return kTRUE;
2490 }
2491
2492 //______________________________________________________________________________________________
2493 Int_t AliShuttle::GetCurrentRun() const
2494 {
2495         //
2496         // Get current run from logbook entry
2497         //
2498
2499         return fLogbookEntry ? fLogbookEntry->GetRun() : -1;
2500 }
2501
2502 //______________________________________________________________________________________________
2503 UInt_t AliShuttle::GetCurrentStartTime() const
2504 {
2505         //
2506         // get current start time
2507         //
2508
2509         return fLogbookEntry ? fLogbookEntry->GetStartTime() : 0;
2510 }
2511
2512 //______________________________________________________________________________________________
2513 UInt_t AliShuttle::GetCurrentEndTime() const
2514 {
2515         //
2516         // get current end time from logbook entry
2517         //
2518
2519         return fLogbookEntry ? fLogbookEntry->GetEndTime() : 0;
2520 }
2521
2522 //______________________________________________________________________________________________
2523 void AliShuttle::Log(const char* detector, const char* message)
2524 {
2525         //
2526         // Fill log string with a message
2527         //
2528
2529         void* dir = gSystem->OpenDirectory(GetShuttleLogDir());
2530         if (dir == NULL) {
2531                 if (gSystem->mkdir(GetShuttleLogDir(), kTRUE)) {
2532                         AliError(Form("Can't open directory <%s>", GetShuttleLogDir()));
2533                         return;
2534                 }
2535
2536         } else {
2537                 gSystem->FreeDirectory(dir);
2538         }
2539
2540         TString toLog = Form("%s (%d): %s - ", TTimeStamp(time(0)).AsString("s"), getpid(), detector);
2541         if (GetCurrentRun() >= 0) 
2542                 toLog += Form("run %d - ", GetCurrentRun());
2543         toLog += Form("%s", message);
2544
2545         AliInfo(toLog.Data());
2546         
2547         // if we redirect the log output already to the file, leave here
2548         if (fOutputRedirected && strcmp(detector, "SHUTTLE") != 0)
2549                 return;
2550
2551         TString fileName = GetLogFileName(detector);
2552         
2553         gSystem->ExpandPathName(fileName);
2554
2555         ofstream logFile;
2556         logFile.open(fileName, ofstream::out | ofstream::app);
2557
2558         if (!logFile.is_open()) {
2559                 AliError(Form("Could not open file %s", fileName.Data()));
2560                 return;
2561         }
2562
2563         logFile << toLog.Data() << "\n";
2564
2565         logFile.close();
2566 }
2567
2568 //______________________________________________________________________________________________
2569 TString AliShuttle::GetLogFileName(const char* detector) const
2570 {
2571         // 
2572         // returns the name of the log file for a given sub detector
2573         //
2574         
2575         TString fileName;
2576         
2577         if (GetCurrentRun() >= 0) 
2578                 fileName.Form("%s/%s_%d.log", GetShuttleLogDir(), detector, GetCurrentRun());
2579         else
2580                 fileName.Form("%s/%s.log", GetShuttleLogDir(), detector);
2581
2582         return fileName;
2583 }
2584
2585 //______________________________________________________________________________________________
2586 Bool_t AliShuttle::Collect(Int_t run)
2587 {
2588         //
2589         // Collects conditions data for all UNPROCESSED run written to DAQ LogBook in case of run = -1 (default)
2590         // If a dedicated run is given this run is processed
2591         //
2592         // In operational mode, this is the Shuttle function triggered by the EOR signal.
2593         //
2594
2595         if (run == -1)
2596                 Log("SHUTTLE","Collect - Shuttle called. Collecting conditions data for unprocessed runs");
2597         else
2598                 Log("SHUTTLE", Form("Collect - Shuttle called. Collecting conditions data for run %d", run));
2599
2600         SetLastAction("Starting");
2601
2602         TString whereClause("where shuttle_done=0");
2603         if (run != -1)
2604                 whereClause += Form(" and run=%d", run);
2605
2606         TObjArray shuttleLogbookEntries;
2607         if (!QueryShuttleLogbook(whereClause, shuttleLogbookEntries))
2608         {
2609                 Log("SHUTTLE", "Collect - Can't retrieve entries from Shuttle logbook");
2610                 return kFALSE;
2611         }
2612
2613         if (shuttleLogbookEntries.GetEntries() == 0)
2614         {
2615                 if (run == -1)
2616                         Log("SHUTTLE","Collect - Found no UNPROCESSED runs in Shuttle logbook");
2617                 else
2618                         Log("SHUTTLE", Form("Collect - Run %d is already DONE "
2619                                                 "or it does not exist in Shuttle logbook", run));
2620                 return kTRUE;
2621         }
2622
2623         for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
2624                 fFirstUnprocessed[iDet] = kTRUE;
2625
2626         if (run != -1)
2627         {
2628                 // query Shuttle logbook for earlier runs, check if some detectors are unprocessed,
2629                 // flag them into fFirstUnprocessed array
2630                 TString whereClause(Form("where shuttle_done=0 and run < %d", run));
2631                 TObjArray tmpLogbookEntries;
2632                 if (!QueryShuttleLogbook(whereClause, tmpLogbookEntries))
2633                 {
2634                         Log("SHUTTLE", "Collect - Can't retrieve entries from Shuttle logbook");
2635                         return kFALSE;
2636                 }
2637
2638                 TIter iter(&tmpLogbookEntries);
2639                 AliShuttleLogbookEntry* anEntry = 0;
2640                 while ((anEntry = dynamic_cast<AliShuttleLogbookEntry*> (iter.Next())))
2641                 {
2642                         for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
2643                         {
2644                                 if (anEntry->GetDetectorStatus(iDet) == AliShuttleLogbookEntry::kUnprocessed)
2645                                 {
2646                                         AliDebug(2, Form("Run %d: setting %s as \"not first time unprocessed\"",
2647                                                         anEntry->GetRun(), GetDetName(iDet)));
2648                                         fFirstUnprocessed[iDet] = kFALSE;
2649                                 }
2650                         }
2651
2652                 }
2653
2654         }
2655
2656         if (!RetrieveConditionsData(shuttleLogbookEntries))
2657         {
2658                 Log("SHUTTLE", "Collect - Process of at least one run failed");
2659                 return kFALSE;
2660         }
2661
2662         Log("SHUTTLE", "Collect - Requested run(s) successfully processed");
2663         return kTRUE;
2664 }
2665
2666 //______________________________________________________________________________________________
2667 Bool_t AliShuttle::RetrieveConditionsData(const TObjArray& dateEntries)
2668 {
2669         //
2670         // Retrieve conditions data for all runs that aren't processed yet
2671         //
2672
2673         Bool_t hasError = kFALSE;
2674
2675         TIter iter(&dateEntries);
2676         AliShuttleLogbookEntry* anEntry;
2677
2678         while ((anEntry = (AliShuttleLogbookEntry*) iter.Next())){
2679                 if (!Process(anEntry)){
2680                         hasError = kTRUE;
2681                 }
2682
2683                 // clean SHUTTLE temp directory
2684                 TString filename = Form("%s/*.shuttle", GetShuttleTempDir());
2685                 RemoveFile(filename.Data());
2686         }
2687
2688         return hasError == kFALSE;
2689 }
2690
2691 //______________________________________________________________________________________________
2692 ULong_t AliShuttle::GetTimeOfLastAction() const
2693 {
2694         //
2695         // Gets time of last action
2696         //
2697
2698         ULong_t tmp;
2699
2700         fMonitoringMutex->Lock();
2701
2702         tmp = fLastActionTime;
2703
2704         fMonitoringMutex->UnLock();
2705
2706         return tmp;
2707 }
2708
2709 //______________________________________________________________________________________________
2710 const TString AliShuttle::GetLastAction() const
2711 {
2712         //
2713         // returns a string description of the last action
2714         //
2715
2716         TString tmp;
2717
2718         fMonitoringMutex->Lock();
2719         
2720         tmp = fLastAction;
2721         
2722         fMonitoringMutex->UnLock();
2723
2724         return tmp;
2725 }
2726
2727 //______________________________________________________________________________________________
2728 void AliShuttle::SetLastAction(const char* action)
2729 {
2730         //
2731         // updates the monitoring variables
2732         //
2733
2734         fMonitoringMutex->Lock();
2735
2736         fLastAction = action;
2737         fLastActionTime = time(0);
2738         
2739         fMonitoringMutex->UnLock();
2740 }
2741
2742 //______________________________________________________________________________________________
2743 const char* AliShuttle::GetRunParameter(const char* param)
2744 {
2745         //
2746         // returns run parameter read from DAQ logbook
2747         //
2748
2749         if(!fLogbookEntry) {
2750                 AliError("No logbook entry!");
2751                 return 0;
2752         }
2753
2754         return fLogbookEntry->GetRunParameter(param);
2755 }
2756
2757 //______________________________________________________________________________________________
2758 AliCDBEntry* AliShuttle::GetFromOCDB(const char* detector, const AliCDBPath& path)
2759 {
2760         //
2761         // returns object from OCDB valid for current run
2762         //
2763
2764         if (fTestMode & kErrorOCDB)
2765         {
2766                 Log(detector, "GetFromOCDB - In TESTMODE - Simulating error with OCDB");
2767                 return 0;
2768         }
2769         
2770         AliCDBStorage *sto = AliCDBManager::Instance()->GetStorage(fgkMainCDB);
2771         if (!sto)
2772         {
2773                 Log(detector, "GetFromOCDB - Cannot activate main OCDB for query!");
2774                 return 0;
2775         }
2776
2777         return dynamic_cast<AliCDBEntry*> (sto->Get(path, GetCurrentRun()));
2778 }
2779
2780 //______________________________________________________________________________________________
2781 Bool_t AliShuttle::SendMail()
2782 {
2783         //
2784         // sends a mail to the subdetector expert in case of preprocessor error
2785         //
2786         
2787         if (fTestMode != kNone)
2788                 return kTRUE;
2789
2790         void* dir = gSystem->OpenDirectory(GetShuttleLogDir());
2791         if (dir == NULL)
2792         {
2793                 if (gSystem->mkdir(GetShuttleLogDir(), kTRUE))
2794                 {
2795                         AliError(Form("Can't open directory <%s>", GetShuttleLogDir()));
2796                         return kFALSE;
2797                 }
2798
2799         } else {
2800                 gSystem->FreeDirectory(dir);
2801         }
2802
2803         TString bodyFileName;
2804         bodyFileName.Form("%s/mail.body", GetShuttleLogDir());
2805         gSystem->ExpandPathName(bodyFileName);
2806
2807         ofstream mailBody;
2808         mailBody.open(bodyFileName, ofstream::out);
2809
2810         if (!mailBody.is_open())
2811         {
2812                 AliError(Form("Could not open mail body file %s", bodyFileName.Data()));
2813                 return kFALSE;
2814         }
2815
2816         TString to="";
2817         TIter iterExperts(fConfig->GetResponsibles(fCurrentDetector));
2818         TObjString *anExpert=0;
2819         while ((anExpert = (TObjString*) iterExperts.Next()))
2820         {
2821                 to += Form("%s,", anExpert->GetName());
2822         }
2823         to.Remove(to.Length()-1);
2824         AliDebug(2, Form("to: %s",to.Data()));
2825
2826         if (to.IsNull()) {
2827                 AliInfo("List of detector responsibles not yet set!");
2828                 return kFALSE;
2829         }
2830
2831         TString cc="alberto.colla@cern.ch";
2832
2833         TString subject = Form("%s Shuttle preprocessor FAILED in run %d !",
2834                                 fCurrentDetector.Data(), GetCurrentRun());
2835         AliDebug(2, Form("subject: %s", subject.Data()));
2836
2837         TString body = Form("Dear %s expert(s), \n\n", fCurrentDetector.Data());
2838         body += Form("SHUTTLE just detected that your preprocessor "
2839                         "failed processing run %d!!\n\n", GetCurrentRun());
2840         body += Form("Please check %s status on the SHUTTLE monitoring page: \n\n", fCurrentDetector.Data());
2841         body += Form("\thttp://pcalimonitor.cern.ch:8889/shuttle.jsp?time=168 \n\n");
2842         body += Form("Find the %s log for the current run on \n\n"
2843                 "\thttp://pcalishuttle01.cern.ch:8880/logs/%s_%d.log \n\n", 
2844                 fCurrentDetector.Data(), fCurrentDetector.Data(), GetCurrentRun());
2845         body += Form("The last 10 lines of %s log file are following:\n\n");
2846
2847         AliDebug(2, Form("Body begin: %s", body.Data()));
2848
2849         mailBody << body.Data();
2850         mailBody.close();
2851         mailBody.open(bodyFileName, ofstream::out | ofstream::app);
2852
2853         TString logFileName = Form("%s/%s_%d.log", GetShuttleLogDir(), fCurrentDetector.Data(), GetCurrentRun());
2854         TString tailCommand = Form("tail -n 10 %s >> %s", logFileName.Data(), bodyFileName.Data());
2855         if (gSystem->Exec(tailCommand.Data()))
2856         {
2857                 mailBody << Form("%s log file not found ...\n\n", fCurrentDetector.Data());
2858         }
2859
2860         TString endBody = Form("------------------------------------------------------\n\n");
2861         endBody += Form("In case of problems please contact the SHUTTLE core team.\n\n");
2862         endBody += "Please do not answer this message directly, it is automatically generated.\n\n";
2863         endBody += "Greetings,\n\n \t\t\tthe SHUTTLE\n";
2864
2865         AliDebug(2, Form("Body end: %s", endBody.Data()));
2866
2867         mailBody << endBody.Data();
2868
2869         mailBody.close();
2870
2871         // send mail!
2872         TString mailCommand = Form("mail -s \"%s\" -c %s %s < %s",
2873                                                 subject.Data(),
2874                                                 cc.Data(),
2875                                                 to.Data(),
2876                                                 bodyFileName.Data());
2877         AliDebug(2, Form("mail command: %s", mailCommand.Data()));
2878
2879         Bool_t result = gSystem->Exec(mailCommand.Data());
2880
2881         return result == 0;
2882 }
2883
2884 //______________________________________________________________________________________________
2885 const char* AliShuttle::GetRunType()
2886 {
2887         //
2888         // returns run type read from "run type" logbook
2889         //
2890
2891         if(!fLogbookEntry) {
2892                 AliError("No logbook entry!");
2893                 return 0;
2894         }
2895
2896         return fLogbookEntry->GetRunType();
2897 }
2898
2899 //______________________________________________________________________________________________
2900 void AliShuttle::SetShuttleTempDir(const char* tmpDir)
2901 {
2902         //
2903         // sets Shuttle temp directory
2904         //
2905
2906         fgkShuttleTempDir = gSystem->ExpandPathName(tmpDir);
2907 }
2908
2909 //______________________________________________________________________________________________
2910 void AliShuttle::SetShuttleLogDir(const char* logDir)
2911 {
2912         //
2913         // sets Shuttle log directory
2914         //
2915
2916         fgkShuttleLogDir = gSystem->ExpandPathName(logDir);
2917 }