7aa1a4df1bd75724c2fc20d017c8f51e503e3e1d
[u/mrichter/AliRoot.git] / SHUTTLE / AliShuttle.cxx
1 /**************************************************************************
2  * Copyright(c) 1998-1999, ALICE Experiment at CERN, All rights reserved. *
3  *                                                                        *
4  * Author: The ALICE Off-line Project.                                    *
5  * Contributors are mentioned in the code where appropriate.              *
6  *                                                                        *
7  * Permission to use, copy, modify and distribute this software and its   *
8  * documentation strictly for non-commercial purposes is hereby granted   *
9  * without fee, provided that the above copyright notice appears in all   *
10  * copies and that both the copyright notice and this permission notice   *
11  * appear in the supporting documentation. The authors make no claims     *
12  * about the suitability of this software for any purpose. It is          *
13  * provided "as is" without express or implied warranty.                  *
14  **************************************************************************/
15
16 /*
17 $Log$
18 Revision 1.53  2007/07/12 09:26:28  jgrosseo
19 updating hlt fxs base path
20
21 Revision 1.52  2007/07/12 08:06:45  jgrosseo
22 adding log messages in getfile... functions
23 adding not implemented copy constructor in alishuttleconfigholder
24
25 Revision 1.51  2007/07/03 17:24:52  acolla
26 root moved to v5-16-00. TFileMerger->Cp moved to TFile::Cp.
27
28 Revision 1.50  2007/07/02 17:19:32  acolla
29 preprocessor is run in a temp directory that is removed when process is finished.
30
31 Revision 1.49  2007/06/29 10:45:06  acolla
32 Number of columns in MySql Shuttle logbook increased by one (HLT added)
33
34 Revision 1.48  2007/06/21 13:06:19  acolla
35 GetFileSources returns dummy list with 1 source if system=DCS (better than
36 returning error as it was)
37
38 Revision 1.47  2007/06/19 17:28:56  acolla
39 HLT updated; missing map bug removed.
40
41 Revision 1.46  2007/06/09 13:01:09  jgrosseo
42 Switching to retrieval of several DCS DPs at a time (multiDPrequest)
43
44 Revision 1.45  2007/05/30 06:35:20  jgrosseo
45 Adding functionality to the Shuttle/TestShuttle:
46 o) Function to retrieve list of sources from a given system (GetFileSources with id=0)
47 o) Function to retrieve list of IDs for a given source      (GetFileIDs)
48 These functions are needed for dealing with the tag files that are saved for the GRP preprocessor
49 Example code has been added to the TestProcessor in TestShuttle
50
51 Revision 1.44  2007/05/11 16:09:32  acolla
52 Reference files for ITS, MUON and PHOS are now stored in OfflineDetName/OnlineDetName/run_...
53 example: ITS/SPD/100_filename.root
54
55 Revision 1.43  2007/05/10 09:59:51  acolla
56 Various bug fixes in StoreRefFilesToGrid; Cleaning of reference storage before processing detector (CleanReferenceStorage)
57
58 Revision 1.42  2007/05/03 08:01:39  jgrosseo
59 typo in last commit :-(
60
61 Revision 1.41  2007/05/03 08:00:48  jgrosseo
62 fixing log message when pp want to skip dcs value retrieval
63
64 Revision 1.40  2007/04/27 07:06:48  jgrosseo
65 GetFileSources returns empty list in case of no files, but successful query
66 No mails sent in testmode
67
68 Revision 1.39  2007/04/17 12:43:57  acolla
69 Correction in StoreOCDB; change of text in mail to detector expert
70
71 Revision 1.38  2007/04/12 08:26:18  jgrosseo
72 updated comment
73
74 Revision 1.37  2007/04/10 16:53:14  jgrosseo
75 redirecting sub detector stdout, stderr to sub detector log file
76
77 Revision 1.35  2007/04/04 16:26:38  acolla
78 1. Re-organization of function calls in TestPreprocessor to make it more meaningful.
79 2. Added missing dependency in test preprocessors.
80 3. in AliShuttle.cxx: processing time and memory consumption info on a single line.
81
82 Revision 1.34  2007/04/04 10:33:36  jgrosseo
83 1) Storing of files to the Grid is now done _after_ your preprocessors succeeded. This is transparent, which means that you can still use the same functions (Store, StoreReferenceData) to store files to the Grid. However, the Shuttle first stores them locally and transfers them after the preprocessor finished. The return code of these two functions has changed from UInt_t to Bool_t which gives you the success of the storing.
84 In case of an error with the Grid, the Shuttle will retry the storing later, the preprocessor does not need to be run again.
85
86 2) The meaning of the return code of the preprocessor has changed. 0 is now success and any other value means failure. This value is stored in the log and you can use it to keep details about the error condition.
87
88 3) New function StoreReferenceFile to _directly_ store a file (without opening it) to the reference storage.
89
90 4) The memory usage of the preprocessor is monitored. If it exceeds 2 GB it is terminated.
91
92 5) New function AliPreprocessor::ProcessDCS(). If you do not need to have DCS data in all cases, you can skip the processing by implemting this function and returning kFALSE under certain conditions. E.g. if there is a certain run type.
93 If you always need DCS data (like before), you do not need to implement it.
94
95 6) The run type has been added to the monitoring page
96
97 Revision 1.33  2007/04/03 13:56:01  acolla
98 Grid Storage at the end of preprocessing. Added virtual method to disable DCS query according to the
99 run type.
100
101 Revision 1.32  2007/02/28 10:41:56  acolla
102 Run type field added in SHUTTLE framework. Run type is read from "run type" logbook and retrieved by
103 AliPreprocessor::GetRunType() function.
104 Added some ldap definition files.
105
106 Revision 1.30  2007/02/13 11:23:21  acolla
107 Moved getters and setters of Shuttle's main OCDB/Reference, local
108 OCDB/Reference, temp and log folders to AliShuttleInterface
109
110 Revision 1.27  2007/01/30 17:52:42  jgrosseo
111 adding monalisa monitoring
112
113 Revision 1.26  2007/01/23 19:20:03  acolla
114 Removed old ldif files, added TOF, MCH ldif files. Added some options in
115 AliShuttleConfig::Print. Added in Ali Shuttle: SetShuttleTempDir and
116 SetShuttleLogDir
117
118 Revision 1.25  2007/01/15 19:13:52  acolla
119 Moved some AliInfo to AliDebug in SendMail function
120
121 Revision 1.21  2006/12/07 08:51:26  jgrosseo
122 update (alberto):
123 table, db names in ldap configuration
124 added GRP preprocessor
125 DCS data can also be retrieved by data point
126
127 Revision 1.20  2006/11/16 16:16:48  jgrosseo
128 introducing strict run ordering flag
129 removed giving preprocessor name to preprocessor, they have to know their name themselves ;-)
130
131 Revision 1.19  2006/11/06 14:23:04  jgrosseo
132 major update (Alberto)
133 o) reading of run parameters from the logbook
134 o) online offline naming conversion
135 o) standalone DCSclient package
136
137 Revision 1.18  2006/10/20 15:22:59  jgrosseo
138 o) Adding time out to the execution of the preprocessors: The Shuttle forks and the parent process monitors the child
139 o) Merging Collect, CollectAll, CollectNew function
140 o) Removing implementation of empty copy constructors (declaration still there!)
141
142 Revision 1.17  2006/10/05 16:20:55  jgrosseo
143 adapting to new CDB classes
144
145 Revision 1.16  2006/10/05 15:46:26  jgrosseo
146 applying to the new interface
147
148 Revision 1.15  2006/10/02 16:38:39  jgrosseo
149 update (alberto):
150 fixed memory leaks
151 storing of objects that failed to be stored to the grid before
152 interfacing of shuttle status table in daq system
153
154 Revision 1.14  2006/08/29 09:16:05  jgrosseo
155 small update
156
157 Revision 1.13  2006/08/15 10:50:00  jgrosseo
158 effc++ corrections (alberto)
159
160 Revision 1.12  2006/08/08 14:19:29  jgrosseo
161 Update to shuttle classes (Alberto)
162
163 - Possibility to set the full object's path in the Preprocessor's and
164 Shuttle's  Store functions
165 - Possibility to extend the object's run validity in the same classes
166 ("startValidity" and "validityInfinite" parameters)
167 - Implementation of the StoreReferenceData function to store reference
168 data in a dedicated CDB storage.
169
170 Revision 1.11  2006/07/21 07:37:20  jgrosseo
171 last run is stored after each run
172
173 Revision 1.10  2006/07/20 09:54:40  jgrosseo
174 introducing status management: The processing per subdetector is divided into several steps,
175 after each step the status is stored on disk. If the system crashes in any of the steps the Shuttle
176 can keep track of the number of failures and skips further processing after a certain threshold is
177 exceeded. These thresholds can be configured in LDAP.
178
179 Revision 1.9  2006/07/19 10:09:55  jgrosseo
180 new configuration, accesst to DAQ FES (Alberto)
181
182 Revision 1.8  2006/07/11 12:44:36  jgrosseo
183 adding parameters for extended validity range of data produced by preprocessor
184
185 Revision 1.7  2006/07/10 14:37:09  jgrosseo
186 small fix + todo comment
187
188 Revision 1.6  2006/07/10 13:01:41  jgrosseo
189 enhanced storing of last sucessfully processed run (alberto)
190
191 Revision 1.5  2006/07/04 14:59:57  jgrosseo
192 revision of AliDCSValue: Removed wrapper classes, reduced storage size per value by factor 2
193
194 Revision 1.4  2006/06/12 09:11:16  jgrosseo
195 coding conventions (Alberto)
196
197 Revision 1.3  2006/06/06 14:26:40  jgrosseo
198 o) removed files that were moved to STEER
199 o) shuttle updated to follow the new interface (Alberto)
200
201 Revision 1.2  2006/03/07 07:52:34  hristov
202 New version (B.Yordanov)
203
204 Revision 1.6  2005/11/19 17:19:14  byordano
205 RetrieveDATEEntries and RetrieveConditionsData added
206
207 Revision 1.5  2005/11/19 11:09:27  byordano
208 AliShuttle declaration added
209
210 Revision 1.4  2005/11/17 17:47:34  byordano
211 TList changed to TObjArray
212
213 Revision 1.3  2005/11/17 14:43:23  byordano
214 import to local CVS
215
216 Revision 1.1.1.1  2005/10/28 07:33:58  hristov
217 Initial import as subdirectory in AliRoot
218
219 Revision 1.2  2005/09/13 08:41:15  byordano
220 default startTime endTime added
221
222 Revision 1.4  2005/08/30 09:13:02  byordano
223 some docs added
224
225 Revision 1.3  2005/08/29 21:15:47  byordano
226 some docs added
227
228 */
229
230 //
231 // This class is the main manager for AliShuttle. 
232 // It organizes the data retrieval from DCS and call the 
233 // interface methods of AliPreprocessor.
234 // For every detector in AliShuttleConfgi (see AliShuttleConfig),
235 // data for its set of aliases is retrieved. If there is registered
236 // AliPreprocessor for this detector then it will be used
237 // accroding to the schema (see AliPreprocessor).
238 // If there isn't registered AliPreprocessor than the retrieved
239 // data is stored automatically to the undelying AliCDBStorage.
240 // For detSpec is used the alias name.
241 //
242
243 #include "AliShuttle.h"
244
245 #include "AliCDBManager.h"
246 #include "AliCDBStorage.h"
247 #include "AliCDBId.h"
248 #include "AliCDBRunRange.h"
249 #include "AliCDBPath.h"
250 #include "AliCDBEntry.h"
251 #include "AliShuttleConfig.h"
252 #include "DCSClient/AliDCSClient.h"
253 #include "AliLog.h"
254 #include "AliPreprocessor.h"
255 #include "AliShuttleStatus.h"
256 #include "AliShuttleLogbookEntry.h"
257
258 #include <TSystem.h>
259 #include <TObject.h>
260 #include <TString.h>
261 #include <TTimeStamp.h>
262 #include <TObjString.h>
263 #include <TSQLServer.h>
264 #include <TSQLResult.h>
265 #include <TSQLRow.h>
266 #include <TMutex.h>
267 #include <TSystemDirectory.h>
268 #include <TSystemFile.h>
269 #include <TFile.h>
270 #include <TFileMerger.h>
271 #include <TGrid.h>
272 #include <TGridResult.h>
273
274 #include <TMonaLisaWriter.h>
275
276 #include <fstream>
277
278 #include <sys/types.h>
279 #include <sys/wait.h>
280
281 ClassImp(AliShuttle)
282
283 //______________________________________________________________________________________________
284 AliShuttle::AliShuttle(const AliShuttleConfig* config,
285                 UInt_t timeout, Int_t retries):
286 fConfig(config),
287 fTimeout(timeout), fRetries(retries),
288 fPreprocessorMap(),
289 fLogbookEntry(0),
290 fCurrentDetector(),
291 fStatusEntry(0),
292 fMonitoringMutex(0),
293 fLastActionTime(0),
294 fLastAction(),
295 fMonaLisa(0),
296 fTestMode(kNone),
297 fReadTestMode(kFALSE),
298 fOutputRedirected(kFALSE)
299 {
300         //
301         // config: AliShuttleConfig used
302         // timeout: timeout used for AliDCSClient connection
303         // retries: the number of retries in case of connection error.
304         //
305
306         if (!fConfig->IsValid()) AliFatal("********** !!!!! Invalid configuration !!!!! **********");
307         for(int iSys=0;iSys<4;iSys++) {
308                 fServer[iSys]=0;
309                 if (iSys < 3)
310                         fFXSlist[iSys].SetOwner(kTRUE);
311         }
312         fPreprocessorMap.SetOwner(kTRUE);
313
314         for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
315                 fFirstUnprocessed[iDet] = kFALSE;
316
317         fMonitoringMutex = new TMutex();
318 }
319
320 //______________________________________________________________________________________________
321 AliShuttle::~AliShuttle()
322 {
323         //
324         // destructor
325         //
326
327         fPreprocessorMap.DeleteAll();
328         for(int iSys=0;iSys<4;iSys++)
329                 if(fServer[iSys]) {
330                         fServer[iSys]->Close();
331                         delete fServer[iSys];
332                         fServer[iSys] = 0;
333                 }
334
335         if (fStatusEntry){
336                 delete fStatusEntry;
337                 fStatusEntry = 0;
338         }
339         
340         if (fMonitoringMutex) 
341         {
342                 delete fMonitoringMutex;
343                 fMonitoringMutex = 0;
344         }
345 }
346
347 //______________________________________________________________________________________________
348 void AliShuttle::RegisterPreprocessor(AliPreprocessor* preprocessor)
349 {
350         //
351         // Registers new AliPreprocessor.
352         // It uses GetName() for indentificator of the pre processor.
353         // The pre processor is registered it there isn't any other
354         // with the same identificator (GetName()).
355         //
356
357         const char* detName = preprocessor->GetName();
358         if(GetDetPos(detName) < 0)
359                 AliFatal(Form("********** !!!!! Invalid detector name: %s !!!!! **********", detName));
360
361         if (fPreprocessorMap.GetValue(detName)) {
362                 AliWarning(Form("AliPreprocessor %s is already registered!", detName));
363                 return;
364         }
365
366         fPreprocessorMap.Add(new TObjString(detName), preprocessor);
367 }
368 //______________________________________________________________________________________________
369 Bool_t AliShuttle::Store(const AliCDBPath& path, TObject* object,
370                 AliCDBMetaData* metaData, Int_t validityStart, Bool_t validityInfinite)
371 {
372         // Stores a CDB object in the storage for offline reconstruction. Objects that are not needed for
373         // offline reconstruction, but should be stored anyway (e.g. for debugging) should NOT be stored
374         // using this function. Use StoreReferenceData instead!
375         // It calls StoreLocally function which temporarily stores the data locally; when the preprocessor
376         // finishes the data are transferred to the main storage (Grid).
377
378         return StoreLocally(fgkLocalCDB, path, object, metaData, validityStart, validityInfinite);
379 }
380
381 //______________________________________________________________________________________________
382 Bool_t AliShuttle::StoreReferenceData(const AliCDBPath& path, TObject* object, AliCDBMetaData* metaData)
383 {
384         // Stores a CDB object in the storage for reference data. This objects will not be available during
385         // offline reconstrunction. Use this function for reference data only!
386         // It calls StoreLocally function which temporarily stores the data locally; when the preprocessor
387         // finishes the data are transferred to the main storage (Grid).
388
389         return StoreLocally(fgkLocalRefStorage, path, object, metaData);
390 }
391
392 //______________________________________________________________________________________________
393 Bool_t AliShuttle::StoreLocally(const TString& localUri,
394                         const AliCDBPath& path, TObject* object, AliCDBMetaData* metaData,
395                         Int_t validityStart, Bool_t validityInfinite)
396 {
397         // Store object temporarily in local storage. Parameters are passed by Store and StoreReferenceData functions.
398         // when the preprocessor finishes the data are transferred to the main storage (Grid).
399         // The parameters are:
400         //   1) Uri of the backup storage (Local)
401         //   2) the object's path.
402         //   3) the object to be stored
403         //   4) the metaData to be associated with the object
404         //   5) the validity start run number w.r.t. the current run,
405         //      if the data is valid only for this run leave the default 0
406         //   6) specifies if the calibration data is valid for infinity (this means until updated),
407         //      typical for calibration runs, the default is kFALSE
408         //
409         // returns 0 if fail, 1 otherwise
410
411         if (fTestMode & kErrorStorage)
412         {
413                 Log(fCurrentDetector, "StoreLocally - In TESTMODE - Simulating error while storing locally");
414                 return kFALSE;
415         }
416         
417         const char* cdbType = (localUri == fgkLocalCDB) ? "CDB" : "Reference";
418
419         Int_t firstRun = GetCurrentRun() - validityStart;
420         if(firstRun < 0) {
421                 AliWarning("First valid run happens to be less than 0! Setting it to 0.");
422                 firstRun=0;
423         }
424
425         Int_t lastRun = -1;
426         if(validityInfinite) {
427                 lastRun = AliCDBRunRange::Infinity();
428         } else {
429                 lastRun = GetCurrentRun();
430         }
431
432         // Version is set to current run, it will be used later to transfer data to Grid
433         AliCDBId id(path, firstRun, lastRun, GetCurrentRun(), -1);
434
435         if(! dynamic_cast<TObjString*> (metaData->GetProperty("RunUsed(TObjString)"))){
436                 TObjString runUsed = Form("%d", GetCurrentRun());
437                 metaData->SetProperty("RunUsed(TObjString)", runUsed.Clone());
438         }
439
440         Bool_t result = kFALSE;
441
442         if (!(AliCDBManager::Instance()->GetStorage(localUri))) {
443                 Log("SHUTTLE", Form("StoreLocally - Cannot activate local %s storage", cdbType));
444         } else {
445                 result = AliCDBManager::Instance()->GetStorage(localUri)
446                                         ->Put(object, id, metaData);
447         }
448
449         if(!result) {
450
451                 Log(fCurrentDetector, Form("StoreLocally - Can't store object <%s>!", id.ToString().Data()));
452         }
453
454         return result;
455 }
456
457 //______________________________________________________________________________________________
458 Bool_t AliShuttle::StoreOCDB()
459 {
460         //
461         // Called when preprocessor ends successfully or when previous storage attempt failed (kStoreError status)
462         // Calls underlying StoreOCDB(const char*) function twice, for OCDB and Reference storage.
463         // Then calls StoreRefFilesToGrid to store reference files. 
464         //
465         
466         if (fTestMode & kErrorGrid)
467         {
468                 Log("SHUTTLE", "StoreOCDB - In TESTMODE - Simulating error while storing in the Grid");
469                 Log(fCurrentDetector, "StoreOCDB - In TESTMODE - Simulating error while storing in the Grid");
470                 return kFALSE;
471         }
472         
473         Log("SHUTTLE","Storing OCDB data ...");
474         Bool_t resultCDB = StoreOCDB(fgkMainCDB);
475
476         Log("SHUTTLE","Storing reference data ...");
477         Bool_t resultRef = StoreOCDB(fgkMainRefStorage);
478         
479         Log("SHUTTLE","Storing reference files ...");
480         Bool_t resultRefFiles = StoreRefFilesToGrid();
481         
482         return resultCDB && resultRef && resultRefFiles;
483 }
484
485 //______________________________________________________________________________________________
486 Bool_t AliShuttle::StoreOCDB(const TString& gridURI)
487 {
488         //
489         // Called by StoreOCDB(), performs actual storage to the main OCDB and reference storages (Grid)
490         //
491
492         TObjArray* gridIds=0;
493
494         Bool_t result = kTRUE;
495
496         const char* type = 0;
497         TString localURI;
498         if(gridURI == fgkMainCDB) {
499                 type = "OCDB";
500                 localURI = fgkLocalCDB;
501         } else if(gridURI == fgkMainRefStorage) {
502                 type = "reference";
503                 localURI = fgkLocalRefStorage;
504         } else {
505                 AliError(Form("Invalid storage URI: %s", gridURI.Data()));
506                 return kFALSE;
507         }
508
509         AliCDBManager* man = AliCDBManager::Instance();
510
511         AliCDBStorage *gridSto = man->GetStorage(gridURI);
512         if(!gridSto) {
513                 Log("SHUTTLE",
514                         Form("StoreOCDB - cannot activate main %s storage", type));
515                 return kFALSE;
516         }
517
518         gridIds = gridSto->GetQueryCDBList();
519
520         // get objects previously stored in local CDB
521         AliCDBStorage *localSto = man->GetStorage(localURI);
522         if(!localSto) {
523                 Log("SHUTTLE",
524                         Form("StoreOCDB - cannot activate local %s storage", type));
525                 return kFALSE;
526         }
527         AliCDBPath aPath(GetOfflineDetName(fCurrentDetector.Data()),"*","*");
528         // Local objects were stored with current run as Grid version!
529         TList* localEntries = localSto->GetAll(aPath.GetPath(), GetCurrentRun(), GetCurrentRun());
530         localEntries->SetOwner(1);
531
532         // loop on local stored objects
533         TIter localIter(localEntries);
534         AliCDBEntry *aLocEntry = 0;
535         while((aLocEntry = dynamic_cast<AliCDBEntry*> (localIter.Next()))){
536                 aLocEntry->SetOwner(1);
537                 AliCDBId aLocId = aLocEntry->GetId();
538                 aLocEntry->SetVersion(-1);
539                 aLocEntry->SetSubVersion(-1);
540
541                 // If local object is valid up to infinity we store it only if it is
542                 // the first unprocessed run!
543                 if (aLocId.GetLastRun() == AliCDBRunRange::Infinity() &&
544                         !fFirstUnprocessed[GetDetPos(fCurrentDetector)])
545                 {
546                         Log("SHUTTLE", Form("StoreOCDB - %s: object %s has validity infinite but "
547                                                 "there are previous unprocessed runs!",
548                                                 fCurrentDetector.Data(), aLocId.GetPath().Data()));
549                         continue;
550                 }
551
552                 // loop on Grid valid Id's
553                 Bool_t store = kTRUE;
554                 TIter gridIter(gridIds);
555                 AliCDBId* aGridId = 0;
556                 while((aGridId = dynamic_cast<AliCDBId*> (gridIter.Next()))){
557                         if(aGridId->GetPath() != aLocId.GetPath()) continue;
558                         // skip all objects valid up to infinity
559                         if(aGridId->GetLastRun() == AliCDBRunRange::Infinity()) continue;
560                         // if we get here, it means there's already some more recent object stored on Grid!
561                         store = kFALSE;
562                         break;
563                 }
564
565                 // If we get here, the file can be stored!
566                 Bool_t storeOk = gridSto->Put(aLocEntry);
567                 if(!store || storeOk){
568
569                         if (!store)
570                         {
571                                 Log(fCurrentDetector.Data(),
572                                         Form("StoreOCDB - A more recent object already exists in %s storage: <%s>",
573                                                 type, aGridId->ToString().Data()));
574                         } else {
575                                 Log("SHUTTLE",
576                                         Form("StoreOCDB - Object <%s> successfully put into %s storage",
577                                                 aLocId.ToString().Data(), type));
578                                 Log(fCurrentDetector.Data(),
579                                         Form("StoreOCDB - Object <%s> successfully put into %s storage",
580                                                 aLocId.ToString().Data(), type));
581                         }
582
583                         // removing local filename...
584                         TString filename;
585                         localSto->IdToFilename(aLocId, filename);
586                         AliInfo(Form("Removing local file %s", filename.Data()));
587                         RemoveFile(filename.Data());
588                         continue;
589                 } else  {
590                         Log("SHUTTLE",
591                                 Form("StoreOCDB - Grid %s storage of object <%s> failed",
592                                         type, aLocId.ToString().Data()));
593                         Log(fCurrentDetector.Data(),
594                                 Form("StoreOCDB - Grid %s storage of object <%s> failed",
595                                         type, aLocId.ToString().Data()));
596                         result = kFALSE;
597                 }
598         }
599         localEntries->Clear();
600
601         return result;
602 }
603
604 //______________________________________________________________________________________________
605 Bool_t AliShuttle::CleanReferenceStorage(const char* detector)
606 {
607         // clears the directory used to store reference files of a given subdetector
608   
609         AliCDBManager* man = AliCDBManager::Instance();
610         AliCDBStorage* sto = man->GetStorage(fgkLocalRefStorage);
611         TString localBaseFolder = sto->GetBaseFolder();
612
613         TString targetDir = GetRefFilePrefix(localBaseFolder.Data(), detector);
614         
615         Log("SHUTTLE", Form("Cleaning %s", targetDir.Data()));
616
617         TString begin;
618         begin.Form("%d_", GetCurrentRun());
619         
620         TSystemDirectory* baseDir = new TSystemDirectory("/", targetDir);
621         if (!baseDir)
622                 return kTRUE;
623                 
624         TList* dirList = baseDir->GetListOfFiles();
625         delete baseDir;
626         
627         if (!dirList) return kTRUE;
628                         
629         if (dirList->GetEntries() < 3) 
630         {
631                 delete dirList;
632                 return kTRUE;
633         }
634                                 
635         Int_t nDirs = 0, nDel = 0;
636         TIter dirIter(dirList);
637         TSystemFile* entry = 0;
638
639         Bool_t success = kTRUE;
640         
641         while ((entry = dynamic_cast<TSystemFile*> (dirIter.Next())))
642         {                                       
643                 if (entry->IsDirectory())
644                         continue;
645                 
646                 TString fileName(entry->GetName());
647                 if (!fileName.BeginsWith(begin))
648                         continue;
649                         
650                 nDirs++;
651                                                 
652                 // delete file
653                 Int_t result = gSystem->Unlink(fileName.Data());
654                 
655                 if (result)
656                 {
657                         Log("SHUTTLE", Form("Could not delete file %s!", fileName.Data()));
658                         success = kFALSE;
659                 } else {
660                         nDel++;
661                 }
662         }
663
664         if(nDirs > 0)
665                 Log("SHUTTLE", Form("CleanReferenceStorage - %d (over %d) reference files in folder %s were deleted.", 
666                         nDel, nDirs, targetDir.Data()));
667
668                 
669         delete dirList;
670         return success;
671
672
673
674
675
676
677   Int_t result = gSystem->GetPathInfo(targetDir, 0, (Long64_t*) 0, 0, 0);
678   if (result == 0)
679   {
680     // delete directory
681     result = gSystem->Exec(Form("rm -r %s", targetDir.Data()));
682     if (result != 0)
683     {  
684       Log("SHUTTLE", Form("StoreReferenceFile - Could not clear directory %s", targetDir.Data()));
685       return kFALSE;
686     }
687   }
688
689   result = gSystem->mkdir(targetDir, kTRUE);
690   if (result != 0)
691   {
692     Log("SHUTTLE", Form("StoreReferenceFile - Error creating base directory %s", targetDir.Data()));
693     return kFALSE;
694   }
695         
696   return kTRUE;
697 }
698
699 //______________________________________________________________________________________________
700 Bool_t AliShuttle::StoreReferenceFile(const char* detector, const char* localFile, const char* gridFileName)
701 {
702         //
703         // Stores reference file directly (without opening it). This function stores the file locally.
704         //
705         // The file is stored under the following location: 
706         // <base folder of local reference storage>/<DET>/<RUN#>_<gridFileName>
707         // where <gridFileName> is the second parameter given to the function
708         // 
709         
710         if (fTestMode & kErrorStorage)
711         {
712                 Log(fCurrentDetector, "StoreReferenceFile - In TESTMODE - Simulating error while storing locally");
713                 return kFALSE;
714         }
715         
716         AliCDBManager* man = AliCDBManager::Instance();
717         AliCDBStorage* sto = man->GetStorage(fgkLocalRefStorage);
718         
719         TString localBaseFolder = sto->GetBaseFolder();
720         
721         TString targetDir = GetRefFilePrefix(localBaseFolder.Data(), detector); 
722         
723         //try to open folder, if does not exist
724         void* dir = gSystem->OpenDirectory(targetDir.Data());
725         if (dir == NULL) {
726                 if (gSystem->mkdir(targetDir.Data(), kTRUE)) {
727                         Log("SHUTTLE", Form("Can't open directory <%s>", targetDir.Data()));
728                         return kFALSE;
729                 }
730
731         } else {
732                 gSystem->FreeDirectory(dir);
733         }
734
735         TString target;
736         target.Form("%s/%d_%s", targetDir.Data(), GetCurrentRun(), gridFileName);
737         
738         Int_t result = gSystem->GetPathInfo(localFile, 0, (Long64_t*) 0, 0, 0);
739         if (result)
740         {
741                 Log("SHUTTLE", Form("StoreReferenceFile - %s does not exist", localFile));
742                 return kFALSE;
743         }
744
745         result = gSystem->CopyFile(localFile, target);
746
747         if (result == 0)
748         {
749                 Log("SHUTTLE", Form("StoreReferenceFile - File %s stored locally to %s", localFile, target.Data()));
750                 return kTRUE;
751         }
752         else
753         {
754                 Log("SHUTTLE", Form("StoreReferenceFile - Could not store file %s to %s!. Error code = %d", 
755                                 localFile, target.Data(), result));
756                 return kFALSE;
757         }       
758 }
759
760 //______________________________________________________________________________________________
761 Bool_t AliShuttle::StoreRefFilesToGrid()
762 {
763         //
764         // Transfers the reference file to the Grid.
765         //
766         // The files are stored under the following location: 
767         // <base folder of reference storage>/<DET>/<RUN#>_<gridFileName>
768         //
769         
770         AliCDBManager* man = AliCDBManager::Instance();
771         AliCDBStorage* sto = man->GetStorage(fgkLocalRefStorage);
772         if (!sto)
773                 return kFALSE;
774         TString localBaseFolder = sto->GetBaseFolder();
775                 
776         TString dir = GetRefFilePrefix(localBaseFolder.Data(), fCurrentDetector.Data());
777                 
778         AliCDBStorage* gridSto = man->GetStorage(fgkMainRefStorage);
779         if (!gridSto)
780                 return kFALSE;
781         
782         TString gridBaseFolder = gridSto->GetBaseFolder();
783
784         TString alienDir = GetRefFilePrefix(gridBaseFolder.Data(), fCurrentDetector.Data());
785         
786         TString begin;
787         begin.Form("%d_", GetCurrentRun());
788         
789         TSystemDirectory* baseDir = new TSystemDirectory("/", dir);
790         if (!baseDir)
791                 return kTRUE;
792                 
793         TList* dirList = baseDir->GetListOfFiles();
794         delete baseDir;
795         
796         if (!dirList) return kTRUE;
797                 
798         if (dirList->GetEntries() < 3) 
799         {
800                 delete dirList;
801                 return kTRUE;
802         }
803                         
804         if (!gGrid)
805         { 
806                 Log("SHUTTLE", "Connection to Grid failed: Cannot continue!");
807                 delete dirList;
808                 return kFALSE;
809         }
810         
811         Int_t nDirs = 0, nTransfer = 0;
812         TIter dirIter(dirList);
813         TSystemFile* entry = 0;
814
815         Bool_t success = kTRUE;
816         Bool_t first = kTRUE;
817         
818         while ((entry = dynamic_cast<TSystemFile*> (dirIter.Next())))
819         {                       
820                 if (entry->IsDirectory())
821                         continue;
822                         
823                 TString fileName(entry->GetName());
824                 if (!fileName.BeginsWith(begin))
825                         continue;
826                         
827                 nDirs++;
828                         
829                 if (first)
830                 {
831                         first = kFALSE;
832                         // check that DET folder exists, otherwise create it
833                         TGridResult* result = gGrid->Ls(alienDir.Data(), "a");
834                         
835                         if (!result)
836                         {
837                                 delete dirList;
838                                 return kFALSE;
839                         }
840                         
841                         if (!result->GetFileName(1)) // TODO: It looks like element 0 is always 0!!
842                         {
843                                 if (!gGrid->Mkdir(alienDir.Data(),"",0))
844                                 {
845                                         Log("SHUTTLE", Form("StoreRefFilesToGrid - Cannot create directory %s",
846                                                         alienDir.Data()));
847                                         delete dirList;
848                                         return kFALSE;
849                                 } else {
850                                         Log("SHUTTLE",Form("Folder %s created", alienDir.Data()));
851                                 }
852                                 
853                         } else {
854                                         Log("SHUTTLE",Form("Folder %s found", alienDir.Data()));
855                         }
856                 }
857                         
858                 TString fullLocalPath;
859                 fullLocalPath.Form("%s/%s", dir.Data(), fileName.Data());
860                 
861                 TString fullGridPath;
862                 fullGridPath.Form("alien://%s/%s", alienDir.Data(), fileName.Data());
863
864                 TFileMerger fileMerger;
865                 Bool_t result = TFile::Cp(fullLocalPath, fullGridPath);
866                 
867                 if (result)
868                 {
869                         Log("SHUTTLE", Form("StoreRefFilesToGrid - Copying local file %s to %s succeeded!", fullLocalPath.Data(), fullGridPath.Data()));
870                         RemoveFile(fullLocalPath);
871                         nTransfer++;
872                 }
873                 else
874                 {
875                         Log("SHUTTLE", Form("StoreRefFilesToGrid - Copying local file %s to %s FAILED!", fullLocalPath.Data(), fullGridPath.Data()));
876                         success = kFALSE;
877                 }
878         }
879
880         Log("SHUTTLE", Form("StoreRefFilesToGrid - %d (over %d) reference files in folder %s copied to Grid.", nTransfer, nDirs, dir.Data()));
881
882                 
883         delete dirList;
884         return success;
885 }
886
887 //______________________________________________________________________________________________
888 const char* AliShuttle::GetRefFilePrefix(const char* base, const char* detector)
889 {
890         //
891         // Get folder name of reference files 
892         //
893
894         TString offDetStr(GetOfflineDetName(detector));
895         TString dir;
896         if (offDetStr == "ITS" || offDetStr == "MUON" || offDetStr == "PHOS")
897         {
898                 dir.Form("%s/%s/%s", base, offDetStr.Data(), detector);
899         } else {
900                 dir.Form("%s/%s", base, offDetStr.Data());
901         }
902         
903         return dir.Data();
904         
905
906 }
907 //______________________________________________________________________________________________
908 void AliShuttle::CleanLocalStorage(const TString& uri)
909 {
910         //
911         // Called in case the preprocessor is declared failed. Remove remaining objects from the local storages.
912         //
913
914         const char* type = 0;
915         if(uri == fgkLocalCDB) {
916                 type = "OCDB";
917         } else if(uri == fgkLocalRefStorage) {
918                 type = "Reference";
919         } else {
920                 AliError(Form("Invalid storage URI: %s", uri.Data()));
921                 return;
922         }
923
924         AliCDBManager* man = AliCDBManager::Instance();
925
926         // open local storage
927         AliCDBStorage *localSto = man->GetStorage(uri);
928         if(!localSto) {
929                 Log("SHUTTLE",
930                         Form("CleanLocalStorage - cannot activate local %s storage", type));
931                 return;
932         }
933
934         TString filename(Form("%s/%s/*/Run*_v%d_s*.root",
935                 localSto->GetBaseFolder().Data(), GetOfflineDetName(fCurrentDetector.Data()), GetCurrentRun()));
936
937         AliInfo(Form("filename = %s", filename.Data()));
938
939         AliInfo(Form("Removing remaining local files from run %d and detector %s ...",
940                 GetCurrentRun(), fCurrentDetector.Data()));
941
942         RemoveFile(filename.Data());
943
944 }
945
946 //______________________________________________________________________________________________
947 void AliShuttle::RemoveFile(const char* filename)
948 {
949         //
950         // removes local file
951         //
952
953         TString command(Form("rm -f %s", filename));
954
955         Int_t result = gSystem->Exec(command.Data());
956         if(result != 0)
957         {
958                 Log("SHUTTLE", Form("RemoveFile - %s: Cannot remove file %s!",
959                         fCurrentDetector.Data(), filename));
960         }
961 }
962
963 //______________________________________________________________________________________________
964 AliShuttleStatus* AliShuttle::ReadShuttleStatus()
965 {
966         //
967         // Reads the AliShuttleStatus from the CDB
968         //
969
970         if (fStatusEntry){
971                 delete fStatusEntry;
972                 fStatusEntry = 0;
973         }
974
975         fStatusEntry = AliCDBManager::Instance()->GetStorage(GetLocalCDB())
976                 ->Get(Form("/SHUTTLE/STATUS/%s", fCurrentDetector.Data()), GetCurrentRun());
977
978         if (!fStatusEntry) return 0;
979         fStatusEntry->SetOwner(1);
980
981         AliShuttleStatus* status = dynamic_cast<AliShuttleStatus*> (fStatusEntry->GetObject());
982         if (!status) {
983                 AliError("Invalid object stored to CDB!");
984                 return 0;
985         }
986
987         return status;
988 }
989
990 //______________________________________________________________________________________________
991 Bool_t AliShuttle::WriteShuttleStatus(AliShuttleStatus* status)
992 {
993         //
994         // writes the status for one subdetector
995         //
996
997         if (fStatusEntry){
998                 delete fStatusEntry;
999                 fStatusEntry = 0;
1000         }
1001
1002         Int_t run = GetCurrentRun();
1003
1004         AliCDBId id(AliCDBPath("SHUTTLE", "STATUS", fCurrentDetector), run, run);
1005
1006         fStatusEntry = new AliCDBEntry(status, id, new AliCDBMetaData);
1007         fStatusEntry->SetOwner(1);
1008
1009         UInt_t result = AliCDBManager::Instance()->GetStorage(fgkLocalCDB)->Put(fStatusEntry);
1010
1011         if (!result) {
1012                 Log("SHUTTLE", Form("WriteShuttleStatus - Failed for %s, run %d",
1013                                                 fCurrentDetector.Data(), run));
1014                 return kFALSE;
1015         }
1016         
1017         SendMLInfo();
1018
1019         return kTRUE;
1020 }
1021
1022 //______________________________________________________________________________________________
1023 void AliShuttle::UpdateShuttleStatus(AliShuttleStatus::Status newStatus, Bool_t increaseCount)
1024 {
1025         //
1026         // changes the AliShuttleStatus for the given detector and run to the given status
1027         //
1028
1029         if (!fStatusEntry){
1030                 AliError("UNEXPECTED: fStatusEntry empty");
1031                 return;
1032         }
1033
1034         AliShuttleStatus* status = dynamic_cast<AliShuttleStatus*> (fStatusEntry->GetObject());
1035
1036         if (!status){
1037                 Log("SHUTTLE", "UNEXPECTED: status could not be read from current CDB entry");
1038                 return;
1039         }
1040
1041         TString actionStr = Form("UpdateShuttleStatus - %s: Changing state from %s to %s",
1042                                 fCurrentDetector.Data(),
1043                                 status->GetStatusName(),
1044                                 status->GetStatusName(newStatus));
1045         Log("SHUTTLE", actionStr);
1046         SetLastAction(actionStr);
1047
1048         status->SetStatus(newStatus);
1049         if (increaseCount) status->IncreaseCount();
1050
1051         AliCDBManager::Instance()->GetStorage(fgkLocalCDB)->Put(fStatusEntry);
1052
1053         SendMLInfo();
1054 }
1055
1056 //______________________________________________________________________________________________
1057 void AliShuttle::SendMLInfo()
1058 {
1059         //
1060         // sends ML information about the current status of the current detector being processed
1061         //
1062         
1063         AliShuttleStatus* status = dynamic_cast<AliShuttleStatus*> (fStatusEntry->GetObject());
1064         
1065         if (!status){
1066                 Log("SHUTTLE", "SendMLInfo - UNEXPECTED: status could not be read from current CDB entry");
1067                 return;
1068         }
1069         
1070         TMonaLisaText  mlStatus(Form("%s_status", fCurrentDetector.Data()), status->GetStatusName());
1071         TMonaLisaValue mlRetryCount(Form("%s_count", fCurrentDetector.Data()), status->GetCount());
1072
1073         TList mlList;
1074         mlList.Add(&mlStatus);
1075         mlList.Add(&mlRetryCount);
1076
1077         fMonaLisa->SendParameters(&mlList);
1078 }
1079
1080 //______________________________________________________________________________________________
1081 Bool_t AliShuttle::ContinueProcessing()
1082 {
1083         // this function reads the AliShuttleStatus information from CDB and
1084         // checks if the processing should be continued
1085         // if yes it returns kTRUE and updates the AliShuttleStatus with nextStatus
1086
1087         if (!fConfig->HostProcessDetector(fCurrentDetector)) return kFALSE;
1088
1089         AliPreprocessor* aPreprocessor =
1090                 dynamic_cast<AliPreprocessor*> (fPreprocessorMap.GetValue(fCurrentDetector));
1091         if (!aPreprocessor)
1092         {
1093                 AliInfo(Form("%s: no preprocessor registered", fCurrentDetector.Data()));
1094                 return kFALSE;
1095         }
1096
1097         AliShuttleLogbookEntry::Status entryStatus =
1098                 fLogbookEntry->GetDetectorStatus(fCurrentDetector);
1099
1100         if(entryStatus != AliShuttleLogbookEntry::kUnprocessed) {
1101                 AliInfo(Form("ContinueProcessing - %s is %s",
1102                                 fCurrentDetector.Data(),
1103                                 fLogbookEntry->GetDetectorStatusName(entryStatus)));
1104                 return kFALSE;
1105         }
1106
1107         // if we get here, according to Shuttle logbook subdetector is in UNPROCESSED state
1108
1109         // check if current run is first unprocessed run for current detector
1110         if (fConfig->StrictRunOrder(fCurrentDetector) &&
1111                 !fFirstUnprocessed[GetDetPos(fCurrentDetector)])
1112         {
1113                 if (fTestMode == kNone)
1114                 {
1115                         Log("SHUTTLE", Form("ContinueProcessing - %s requires strict run ordering but this is not the first unprocessed run!"));
1116                         return kFALSE;
1117                 }
1118                 else
1119                 {
1120                         Log("SHUTTLE", Form("ContinueProcessing - In TESTMODE - Although %s requires strict run ordering and this is not the first unprocessed run, the SHUTTLE continues"));
1121                 }
1122         }
1123
1124         AliShuttleStatus* status = ReadShuttleStatus();
1125         if (!status) {
1126                 // first time
1127                 Log("SHUTTLE", Form("ContinueProcessing - %s: Processing first time",
1128                                 fCurrentDetector.Data()));
1129                 status = new AliShuttleStatus(AliShuttleStatus::kStarted);
1130                 return WriteShuttleStatus(status);
1131         }
1132
1133         // The following two cases shouldn't happen if Shuttle Logbook was correctly updated.
1134         // If it happens it may mean Logbook updating failed... let's do it now!
1135         if (status->GetStatus() == AliShuttleStatus::kDone ||
1136             status->GetStatus() == AliShuttleStatus::kFailed){
1137                 Log("SHUTTLE", Form("ContinueProcessing - %s is already %s. Updating Shuttle Logbook",
1138                                         fCurrentDetector.Data(),
1139                                         status->GetStatusName(status->GetStatus())));
1140                 UpdateShuttleLogbook(fCurrentDetector.Data(),
1141                                         status->GetStatusName(status->GetStatus()));
1142                 return kFALSE;
1143         }
1144
1145         if (status->GetStatus() == AliShuttleStatus::kStoreError) {
1146                 Log("SHUTTLE",
1147                         Form("ContinueProcessing - %s: Grid storage of one or more objects failed. Trying again now",
1148                                 fCurrentDetector.Data()));
1149                 UpdateShuttleStatus(AliShuttleStatus::kStoreStarted);
1150                 if (StoreOCDB()){
1151                         Log("SHUTTLE", Form("ContinueProcessing - %s: all objects successfully stored into main storage",
1152                                 fCurrentDetector.Data()));
1153                         UpdateShuttleStatus(AliShuttleStatus::kDone);
1154                         UpdateShuttleLogbook(fCurrentDetector.Data(), "DONE");
1155                 } else {
1156                         Log("SHUTTLE",
1157                                 Form("ContinueProcessing - %s: Grid storage failed again",
1158                                         fCurrentDetector.Data()));
1159                         UpdateShuttleStatus(AliShuttleStatus::kStoreError);
1160                 }
1161                 return kFALSE;
1162         }
1163
1164         // if we get here, there is a restart
1165         Bool_t cont = kFALSE;
1166
1167         // abort conditions
1168         if (status->GetCount() >= fConfig->GetMaxRetries()) {
1169                 Log("SHUTTLE", Form("ContinueProcessing - %s failed %d times in status %s - "
1170                                 "Updating Shuttle Logbook", fCurrentDetector.Data(),
1171                                 status->GetCount(), status->GetStatusName()));
1172                 UpdateShuttleLogbook(fCurrentDetector.Data(), "FAILED");
1173                 UpdateShuttleStatus(AliShuttleStatus::kFailed);
1174
1175                 // there may still be objects in local OCDB and reference storage
1176                 // and FXS databases may be not updated: do it now!
1177                 
1178                 // TODO Currently disabled, we want to keep files in case of failure!
1179                 // CleanLocalStorage(fgkLocalCDB);
1180                 // CleanLocalStorage(fgkLocalRefStorage);
1181                 // UpdateTableFailCase();
1182                 
1183                 // Send mail to detector expert!
1184                 AliInfo(Form("Sending mail to %s expert...", fCurrentDetector.Data()));
1185                 if (!SendMail())
1186                         Log("SHUTTLE", Form("ContinueProcessing - Could not send mail to %s expert",
1187                                         fCurrentDetector.Data()));
1188
1189         } else {
1190                 Log("SHUTTLE", Form("ContinueProcessing - %s: restarting. "
1191                                 "Aborted before with %s. Retry number %d.", fCurrentDetector.Data(),
1192                                 status->GetStatusName(), status->GetCount()));
1193                 Bool_t increaseCount = kTRUE;
1194                 if (status->GetStatus() == AliShuttleStatus::kDCSError || status->GetStatus() == AliShuttleStatus::kDCSStarted)
1195                         increaseCount = kFALSE;
1196                 UpdateShuttleStatus(AliShuttleStatus::kStarted, increaseCount);
1197                 cont = kTRUE;
1198         }
1199
1200         return cont;
1201 }
1202
1203 //______________________________________________________________________________________________
1204 Bool_t AliShuttle::Process(AliShuttleLogbookEntry* entry)
1205 {
1206         //
1207         // Makes data retrieval for all detectors in the configuration.
1208         // entry: Shuttle logbook entry, contains run paramenters and status of detectors
1209         // (Unprocessed, Inactive, Failed or Done).
1210         // Returns kFALSE in case of error occured and kTRUE otherwise
1211         //
1212
1213         if (!entry) return kFALSE;
1214
1215         fLogbookEntry = entry;
1216
1217         AliInfo(Form("\n\n \t\t\t^*^*^*^*^*^*^*^*^*^*^*^* run %d: START ^*^*^*^*^*^*^*^*^*^*^*^* \n",
1218                                         GetCurrentRun()));
1219
1220         // create ML instance that monitors this run
1221         fMonaLisa = new TMonaLisaWriter(Form("%d", GetCurrentRun()), "SHUTTLE", "aliendb1.cern.ch");
1222         // disable monitoring of other parameters that come e.g. from TFile
1223         gMonitoringWriter = 0;
1224
1225         // Send the information to ML
1226         TMonaLisaText  mlStatus("SHUTTLE_status", "Processing");
1227         TMonaLisaText  mlRunType("SHUTTLE_runtype", Form("%s (%s)", entry->GetRunType(), entry->GetRunParameter("log")));
1228
1229         TList mlList;
1230         mlList.Add(&mlStatus);
1231         mlList.Add(&mlRunType);
1232
1233         fMonaLisa->SendParameters(&mlList);
1234
1235         if (fLogbookEntry->IsDone())
1236         {
1237                 Log("SHUTTLE","Process - Shuttle is already DONE. Updating logbook");
1238                 UpdateShuttleLogbook("shuttle_done");
1239                 fLogbookEntry = 0;
1240                 return kTRUE;
1241         }
1242
1243         // read test mode if flag is set
1244         if (fReadTestMode)
1245         {
1246                 fTestMode = kNone;
1247                 TString logEntry(entry->GetRunParameter("log"));
1248                 //printf("log entry = %s\n", logEntry.Data());
1249                 TString searchStr("Testmode: ");
1250                 Int_t pos = logEntry.Index(searchStr.Data());
1251                 //printf("%d\n", pos);
1252                 if (pos >= 0)
1253                 {
1254                         TSubString subStr = logEntry(pos + searchStr.Length(), logEntry.Length());
1255                         //printf("%s\n", subStr.String().Data());
1256                         TString newStr(subStr.Data());
1257                         TObjArray* token = newStr.Tokenize(' ');
1258                         if (token)
1259                         {
1260                                 //token->Print();
1261                                 TObjString* tmpStr = dynamic_cast<TObjString*> (token->First());
1262                                 if (tmpStr)
1263                                 {
1264                                         Int_t testMode = tmpStr->String().Atoi();
1265                                         if (testMode > 0)
1266                                         {
1267                                                 Log("SHUTTLE", Form("Enabling test mode %d", testMode));
1268                                                 SetTestMode((TestMode) testMode);
1269                                         }
1270                                 }
1271                                 delete token;          
1272                         }
1273                 }
1274         }
1275         
1276         Log("SHUTTLE", Form("The test mode flag is %d", (Int_t) fTestMode));
1277         
1278         fLogbookEntry->Print("all");
1279
1280         // Initialization
1281         Bool_t hasError = kFALSE;
1282
1283         AliCDBStorage *mainCDBSto = AliCDBManager::Instance()->GetStorage(fgkMainCDB);
1284         if(mainCDBSto) mainCDBSto->QueryCDB(GetCurrentRun());
1285         AliCDBStorage *mainRefSto = AliCDBManager::Instance()->GetStorage(fgkMainRefStorage);
1286         if(mainRefSto) mainRefSto->QueryCDB(GetCurrentRun());
1287
1288         // Loop on detectors in the configuration
1289         TIter iter(fConfig->GetDetectors());
1290         TObjString* aDetector = 0;
1291
1292         while ((aDetector = (TObjString*) iter.Next()))
1293         {
1294                 fCurrentDetector = aDetector->String();
1295
1296                 if (ContinueProcessing() == kFALSE) continue;
1297
1298                 AliInfo(Form("\n\n \t\t\t****** run %d - %s: START  ******",
1299                                                 GetCurrentRun(), aDetector->GetName()));
1300
1301                 for(Int_t iSys=0;iSys<3;iSys++) fFXSCalled[iSys]=kFALSE;
1302
1303                 Log(fCurrentDetector.Data(), "Starting processing");
1304
1305                 Int_t pid = fork();
1306
1307                 if (pid < 0)
1308                 {
1309                         Log("SHUTTLE", "ERROR: Forking failed");
1310                 }
1311                 else if (pid > 0)
1312                 {
1313                         // parent
1314                         AliInfo(Form("In parent process of %d - %s: Starting monitoring",
1315                                                         GetCurrentRun(), aDetector->GetName()));
1316
1317                         Long_t begin = time(0);
1318
1319                         int status; // to be used with waitpid, on purpose an int (not Int_t)!
1320                         while (waitpid(pid, &status, WNOHANG) == 0)
1321                         {
1322                                 Long_t expiredTime = time(0) - begin;
1323
1324                                 if (expiredTime > fConfig->GetPPTimeOut())
1325                                 {
1326                                         TString tmp;
1327                                         tmp.Form("Process of %s time out. Run time: %d seconds. Killing...",
1328                                                                 fCurrentDetector.Data(), expiredTime);
1329                                         Log("SHUTTLE", tmp);
1330                                         Log(fCurrentDetector, tmp);
1331
1332                                         kill(pid, 9);
1333
1334                                         UpdateShuttleStatus(AliShuttleStatus::kPPTimeOut);
1335                                         hasError = kTRUE;
1336
1337                                         gSystem->Sleep(1000);
1338                                 }
1339                                 else
1340                                 {
1341                                         gSystem->Sleep(1000);
1342                                         
1343                                         TString checkStr;
1344                                         checkStr.Form("ps -o vsize --pid %d | tail -n 1", pid);
1345                                         FILE* pipe = gSystem->OpenPipe(checkStr, "r");
1346                                         if (!pipe)
1347                                         {
1348                                                 Log("SHUTTLE", Form("Error: Could not open pipe to %s", checkStr.Data()));
1349                                                 continue;
1350                                         }
1351                                                 
1352                                         char buffer[100];
1353                                         if (!fgets(buffer, 100, pipe))
1354                                         {
1355                                                 Log("SHUTTLE", "Error: ps did not return anything");
1356                                                 gSystem->ClosePipe(pipe);
1357                                                 continue;
1358                                         }
1359                                         gSystem->ClosePipe(pipe);
1360                                         
1361                                         //Log("SHUTTLE", Form("ps returned %s", buffer));
1362                                         
1363                                         Int_t mem = 0;
1364                                         if ((sscanf(buffer, "%d\n", &mem) != 1) || !mem)
1365                                         {
1366                                                 Log("SHUTTLE", "Error: Could not parse output of ps");
1367                                                 continue;
1368                                         }
1369                                         
1370                                         if (expiredTime % 60 == 0)
1371                                                 Log("SHUTTLE", Form("%s: Checking process. Run time: %d seconds - Memory consumption: %d KB",
1372                                                                 fCurrentDetector.Data(), expiredTime, mem));
1373                                         
1374                                         if (mem > fConfig->GetPPMaxMem())
1375                                         {
1376                                                 TString tmp;
1377                                                 tmp.Form("Process exceeds maximum allowed memory (%d KB > %d KB). Killing...",
1378                                                         mem, fConfig->GetPPMaxMem());
1379                                                 Log("SHUTTLE", tmp);
1380                                                 Log(fCurrentDetector, tmp);
1381         
1382                                                 kill(pid, 9);
1383         
1384                                                 UpdateShuttleStatus(AliShuttleStatus::kPPOutOfMemory);
1385                                                 hasError = kTRUE;
1386         
1387                                                 gSystem->Sleep(1000);
1388                                         }
1389                                 }
1390                         }
1391
1392                         AliInfo(Form("In parent process of %d - %s: Client has terminated.",
1393                                                                 GetCurrentRun(), aDetector->GetName()));
1394
1395                         if (WIFEXITED(status))
1396                         {
1397                                 Int_t returnCode = WEXITSTATUS(status);
1398
1399                                 Log("SHUTTLE", Form("%s: the return code is %d", fCurrentDetector.Data(),
1400                                                                                 returnCode));
1401
1402                                 if (returnCode == 0) hasError = kTRUE;
1403                         }
1404                 }
1405                 else if (pid == 0)
1406                 {
1407                         // client
1408                         AliInfo(Form("In client process of %d - %s", GetCurrentRun(), aDetector->GetName()));
1409
1410                         AliInfo("Redirecting output...");
1411
1412                         if ((freopen(GetLogFileName(fCurrentDetector), "a", stdout)) == 0)
1413                         {
1414                                 Log("SHUTTLE", "Could not freopen stdout");
1415                         }
1416                         else
1417                         {
1418                                 fOutputRedirected = kTRUE;
1419                                 if ((dup2(fileno(stdout), fileno(stderr))) < 0)
1420                                         Log("SHUTTLE", "Could not redirect stderr");
1421                                 
1422                         }
1423                         
1424                         TString wd = gSystem->WorkingDirectory();
1425                         TString tmpDir = Form("%s/%s_process",GetShuttleTempDir(),fCurrentDetector.Data());
1426                         
1427                         gSystem->mkdir(tmpDir.Data());
1428                         gSystem->ChangeDirectory(tmpDir.Data());
1429                         
1430                         Bool_t success = ProcessCurrentDetector();
1431                         
1432                         gSystem->ChangeDirectory(wd.Data());
1433                         
1434                         gSystem->Exec(Form("rm -rf %s",tmpDir.Data()));
1435                         
1436                         if (success) // Preprocessor finished successfully!
1437                         { 
1438                                 // Update time_processed field in FXS DB
1439                                 if (UpdateTable() == kFALSE)
1440                                         Log("SHUTTLE", Form("Process - %s: Could not update FXS databases!", 
1441                                                         fCurrentDetector.Data()));
1442
1443                                 // Transfer the data from local storage to main storage (Grid)
1444                                 UpdateShuttleStatus(AliShuttleStatus::kStoreStarted);
1445                                 if (StoreOCDB() == kFALSE)
1446                                 {
1447                                         AliInfo(Form("\n \t\t\t****** run %d - %s: STORAGE ERROR ****** \n\n",
1448                                                         GetCurrentRun(), aDetector->GetName()));
1449                                         UpdateShuttleStatus(AliShuttleStatus::kStoreError);
1450                                         success = kFALSE;
1451                                 } else {
1452                                         AliInfo(Form("\n \t\t\t****** run %d - %s: DONE ****** \n\n",
1453                                                         GetCurrentRun(), aDetector->GetName()));
1454                                         UpdateShuttleStatus(AliShuttleStatus::kDone);
1455                                         UpdateShuttleLogbook(fCurrentDetector, "DONE");
1456                                 }
1457                         }
1458
1459                         for (UInt_t iSys=0; iSys<3; iSys++)
1460                         {
1461                                 if (fFXSCalled[iSys]) fFXSlist[iSys].Clear();
1462                         }
1463
1464                         AliInfo(Form("Client process of %d - %s is exiting now with %d.",
1465                                                         GetCurrentRun(), aDetector->GetName(), success));
1466
1467                         // the client exits here
1468                         gSystem->Exit(success);
1469
1470                         AliError("We should never get here!!!");
1471                 }
1472         }
1473
1474         AliInfo(Form("\n\n \t\t\t^*^*^*^*^*^*^*^*^*^*^*^* run %d: FINISH ^*^*^*^*^*^*^*^*^*^*^*^* \n",
1475                                                         GetCurrentRun()));
1476
1477         //check if shuttle is done for this run, if so update logbook
1478         TObjArray checkEntryArray;
1479         checkEntryArray.SetOwner(1);
1480         TString whereClause = Form("where run=%d", GetCurrentRun());
1481         if (!QueryShuttleLogbook(whereClause.Data(), checkEntryArray) || checkEntryArray.GetEntries() == 0) {
1482                 Log("SHUTTLE", Form("Process - Warning: Cannot check status of run %d on Shuttle logbook!",
1483                                                 GetCurrentRun()));
1484                 return hasError == kFALSE;
1485         }
1486
1487         AliShuttleLogbookEntry* checkEntry = dynamic_cast<AliShuttleLogbookEntry*>
1488                                                 (checkEntryArray.At(0));
1489
1490         if (checkEntry)
1491         {
1492                 if (checkEntry->IsDone())
1493                 {
1494                         Log("SHUTTLE","Process - Shuttle is DONE. Updating logbook");
1495                         UpdateShuttleLogbook("shuttle_done");
1496                 }
1497                 else
1498                 {
1499                         for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
1500                         {
1501                                 if (checkEntry->GetDetectorStatus(iDet) == AliShuttleLogbookEntry::kUnprocessed)
1502                                 {
1503                                         AliDebug(2, Form("Run %d: setting %s as \"not first time unprocessed\"",
1504                                                         checkEntry->GetRun(), GetDetName(iDet)));
1505                                         fFirstUnprocessed[iDet] = kFALSE;
1506                                 }
1507                         }
1508                 }
1509         }
1510
1511         // remove ML instance
1512         delete fMonaLisa;
1513         fMonaLisa = 0;
1514
1515         fLogbookEntry = 0;
1516
1517         return hasError == kFALSE;
1518 }
1519
1520 //______________________________________________________________________________________________
1521 Bool_t AliShuttle::ProcessCurrentDetector()
1522 {
1523         //
1524         // Makes data retrieval just for a specific detector (fCurrentDetector).
1525         // Threre should be a configuration for this detector.
1526
1527         AliInfo(Form("Retrieving values for %s, run %d", fCurrentDetector.Data(), GetCurrentRun()));
1528
1529         if (!CleanReferenceStorage(fCurrentDetector.Data()))
1530                 return kFALSE;
1531
1532         TMap* dcsMap = 0;
1533
1534         // call preprocessor
1535         AliPreprocessor* aPreprocessor =
1536                 dynamic_cast<AliPreprocessor*> (fPreprocessorMap.GetValue(fCurrentDetector));
1537
1538         aPreprocessor->Initialize(GetCurrentRun(), GetCurrentStartTime(), GetCurrentEndTime());
1539
1540         Bool_t processDCS = aPreprocessor->ProcessDCS();
1541
1542         if (!processDCS)
1543         {
1544                 Log(fCurrentDetector, "The preprocessor requested to skip the retrieval of DCS values");
1545         }
1546         else if (fTestMode & kSkipDCS)
1547         {
1548                 Log(fCurrentDetector, "In TESTMODE - Skipping DCS processing!");
1549         } 
1550         else if (fTestMode & kErrorDCS)
1551         {
1552                 Log(fCurrentDetector, "In TESTMODE - Simulating DCS error");
1553                 UpdateShuttleStatus(AliShuttleStatus::kDCSStarted);
1554                 UpdateShuttleStatus(AliShuttleStatus::kDCSError);
1555                 return kFALSE;
1556         } else {
1557
1558                 UpdateShuttleStatus(AliShuttleStatus::kDCSStarted);
1559
1560                 TString host(fConfig->GetDCSHost(fCurrentDetector));
1561                 Int_t port = fConfig->GetDCSPort(fCurrentDetector);
1562
1563                 if (fConfig->GetDCSAliases(fCurrentDetector)->GetEntries() > 0)
1564                 {
1565                         dcsMap = GetValueSet(host, port, fConfig->GetDCSAliases(fCurrentDetector), kAlias);
1566                         if (!dcsMap)
1567                         {
1568                                 Log(fCurrentDetector, "ProcessCurrentDetector - Error while retrieving DCS aliases");
1569                                 UpdateShuttleStatus(AliShuttleStatus::kDCSError);
1570                                 return kFALSE;
1571                         }
1572                 }
1573                 
1574                 if (fConfig->GetDCSDataPoints(fCurrentDetector)->GetEntries() > 0)
1575                 {
1576                         TMap* dcsMap2 = GetValueSet(host, port, fConfig->GetDCSDataPoints(fCurrentDetector), kDP);
1577                         if (!dcsMap2)
1578                         {
1579                                 Log(fCurrentDetector, "ProcessCurrentDetector - Error while retrieving DCS data points");
1580                                 UpdateShuttleStatus(AliShuttleStatus::kDCSError);
1581                                 if (dcsMap)
1582                                         delete dcsMap;
1583                                 return kFALSE;
1584                         }
1585                         
1586                         if (!dcsMap)
1587                         {
1588                                 dcsMap = dcsMap2;
1589                         }
1590                         else // merge
1591                         {
1592                                 TIter iter(dcsMap2);
1593                                 TObjString* key = 0;
1594                                 while ((key = (TObjString*) iter.Next()))
1595                                         dcsMap->Add(key, dcsMap2->GetValue(key->String()));
1596                                         
1597                                 dcsMap2->SetOwner(kFALSE);
1598                                 delete dcsMap2;
1599                         }
1600                 }
1601                 
1602         }
1603
1604         // still no map?
1605         if (!dcsMap)
1606                 dcsMap = new TMap;
1607         
1608         // DCS Archive DB processing successful. Call Preprocessor!
1609         UpdateShuttleStatus(AliShuttleStatus::kPPStarted);
1610
1611         UInt_t returnValue = aPreprocessor->Process(dcsMap);
1612
1613         if (returnValue > 0) // Preprocessor error!
1614         {
1615                 Log(fCurrentDetector, Form("Preprocessor failed. Process returned %d.", returnValue));
1616                 UpdateShuttleStatus(AliShuttleStatus::kPPError);
1617                 dcsMap->DeleteAll();
1618                 delete dcsMap;
1619                 return kFALSE;
1620         }
1621         
1622         // preprocessor ok!
1623         UpdateShuttleStatus(AliShuttleStatus::kPPDone);
1624         Log(fCurrentDetector, Form("ProcessCurrentDetector - %s preprocessor returned success",
1625                                 fCurrentDetector.Data()));
1626
1627         dcsMap->DeleteAll();
1628         delete dcsMap;
1629
1630         return kTRUE;
1631 }
1632
1633 //______________________________________________________________________________________________
1634 Bool_t AliShuttle::QueryShuttleLogbook(const char* whereClause,
1635                 TObjArray& entries)
1636 {
1637         // Query DAQ's Shuttle logbook and fills detector status object.
1638         // Call QueryRunParameters to query DAQ logbook for run parameters.
1639         //
1640
1641         entries.SetOwner(1);
1642
1643         // check connection, in case connect
1644         if(!Connect(3)) return kFALSE;
1645
1646         TString sqlQuery;
1647         sqlQuery = Form("select * from %s %s order by run", fConfig->GetShuttlelbTable(), whereClause);
1648
1649         TSQLResult* aResult = fServer[3]->Query(sqlQuery);
1650         if (!aResult) {
1651                 AliError(Form("Can't execute query <%s>!", sqlQuery.Data()));
1652                 return kFALSE;
1653         }
1654
1655         AliDebug(2,Form("Query = %s", sqlQuery.Data()));
1656
1657         if(aResult->GetRowCount() == 0) {
1658                 AliInfo("No entries in Shuttle Logbook match request");
1659                 delete aResult;
1660                 return kTRUE;
1661         }
1662
1663         // TODO Check field count!
1664         const UInt_t nCols = 23;
1665         if (aResult->GetFieldCount() != (Int_t) nCols) {
1666                 AliError("Invalid SQL result field number!");
1667                 delete aResult;
1668                 return kFALSE;
1669         }
1670
1671         TSQLRow* aRow;
1672         while ((aRow = aResult->Next())) {
1673                 TString runString(aRow->GetField(0), aRow->GetFieldLength(0));
1674                 Int_t run = runString.Atoi();
1675
1676                 AliShuttleLogbookEntry *entry = QueryRunParameters(run);
1677                 if (!entry)
1678                         continue;
1679
1680                 // loop on detectors
1681                 for(UInt_t ii = 0; ii < nCols; ii++)
1682                         entry->SetDetectorStatus(aResult->GetFieldName(ii), aRow->GetField(ii));
1683
1684                 entries.AddLast(entry);
1685                 delete aRow;
1686         }
1687
1688         delete aResult;
1689         return kTRUE;
1690 }
1691
1692 //______________________________________________________________________________________________
1693 AliShuttleLogbookEntry* AliShuttle::QueryRunParameters(Int_t run)
1694 {
1695         //
1696         // Retrieve run parameters written in the DAQ logbook and sets them into AliShuttleLogbookEntry object
1697         //
1698
1699         // check connection, in case connect
1700         if (!Connect(3))
1701                 return 0;
1702
1703         TString sqlQuery;
1704         sqlQuery.Form("select * from %s where run=%d", fConfig->GetDAQlbTable(), run);
1705
1706         TSQLResult* aResult = fServer[3]->Query(sqlQuery);
1707         if (!aResult) {
1708                 AliError(Form("Can't execute query <%s>!", sqlQuery.Data()));
1709                 return 0;
1710         }
1711
1712         if (aResult->GetRowCount() == 0) {
1713                 Log("SHUTTLE", Form("QueryRunParameters - No entry in DAQ Logbook for run %d. Skipping", run));
1714                 delete aResult;
1715                 return 0;
1716         }
1717
1718         if (aResult->GetRowCount() > 1) {
1719                 AliError(Form("More than one entry in DAQ Logbook for run %d. Skipping", run));
1720                 delete aResult;
1721                 return 0;
1722         }
1723
1724         TSQLRow* aRow = aResult->Next();
1725         if (!aRow)
1726         {
1727                 AliError(Form("Could not retrieve row for run %d. Skipping", run));
1728                 delete aResult;
1729                 return 0;
1730         }
1731
1732         AliShuttleLogbookEntry* entry = new AliShuttleLogbookEntry(run);
1733
1734         for (Int_t ii = 0; ii < aResult->GetFieldCount(); ii++)
1735                 entry->SetRunParameter(aResult->GetFieldName(ii), aRow->GetField(ii));
1736
1737         UInt_t startTime = entry->GetStartTime();
1738         UInt_t endTime = entry->GetEndTime();
1739
1740         if (!startTime || !endTime || startTime > endTime) {
1741                 Log("SHUTTLE",
1742                         Form("QueryRunParameters - Invalid parameters for Run %d: startTime = %d, endTime = %d",
1743                                 run, startTime, endTime));
1744                 delete entry;
1745                 delete aRow;
1746                 delete aResult;
1747                 return 0;
1748         }
1749
1750         delete aRow;
1751         delete aResult;
1752
1753         return entry;
1754 }
1755
1756 //______________________________________________________________________________________________
1757 Bool_t AliShuttle::GetValueSet(const char* host, Int_t port, const char* entry,
1758                                 TObjArray* valueSet, DCSType type)
1759 {
1760         // Retrieve all "entry" data points from the DCS server
1761         // host, port: TSocket connection parameters
1762         // entry: name of the alias or data point
1763         // valueSet: array of retrieved AliDCSValue's
1764         // type: kAlias or kDP
1765
1766         AliDCSClient client(host, port, fTimeout, fRetries);
1767         if (!client.IsConnected())
1768         {
1769                 return kFALSE;
1770         }
1771
1772         Int_t result=0;
1773
1774         if (type == kAlias)
1775         {
1776                 result = client.GetAliasValues(entry,
1777                         GetCurrentStartTime(), GetCurrentEndTime(), valueSet);
1778         } else
1779         if (type == kDP)
1780         {
1781                 result = client.GetDPValues(entry,
1782                         GetCurrentStartTime(), GetCurrentEndTime(), valueSet);
1783         }
1784
1785         if (result < 0)
1786         {
1787                 Log(fCurrentDetector.Data(), Form("GetValueSet - Can't get '%s'! Reason: %s",
1788                         entry, AliDCSClient::GetErrorString(result)));
1789
1790                 if (result == AliDCSClient::fgkServerError)
1791                 {
1792                         Log(fCurrentDetector.Data(), Form("GetValueSet - Server error: %s",
1793                                 client.GetServerError().Data()));
1794                 }
1795
1796                 return kFALSE;
1797         }
1798
1799         return kTRUE;
1800 }
1801
1802 //______________________________________________________________________________________________
1803 TMap* AliShuttle::GetValueSet(const char* host, Int_t port, const TSeqCollection* entries,
1804                               DCSType type)
1805 {
1806         // Retrieve all "entry" data points from the DCS server
1807         // host, port: TSocket connection parameters
1808         // entries: list of name of the alias or data point
1809         // type: kAlias or kDP
1810         // returns TMap of values, 0 when failure
1811
1812         const Int_t kSplit = 100; // maximum number of DPs at a time
1813         
1814         Int_t totalEntries = entries->GetEntries();
1815         
1816         TMap* result = 0;
1817         
1818         for (Int_t index=0; index < totalEntries; index += kSplit)
1819         {
1820                 Int_t endIndex = index + kSplit;
1821         
1822                 AliDCSClient client(host, port, fTimeout, fRetries);
1823                 if (!client.IsConnected())
1824                         return 0;
1825
1826                 TMap* partialResult = 0;
1827
1828                 if (type == kAlias)
1829                 {
1830                         partialResult = client.GetAliasValues(entries, GetCurrentStartTime(), 
1831                                 GetCurrentEndTime(), index, endIndex);
1832                 } 
1833                 else if (type == kDP)
1834                 {
1835                         partialResult = client.GetDPValues(entries, GetCurrentStartTime(), 
1836                                 GetCurrentEndTime(), index, endIndex);
1837                 }
1838
1839                 if (partialResult == 0)
1840                 {
1841                         Log(fCurrentDetector.Data(), Form("GetValueSet - Can't get entries (%d...%d)! Reason: %s",
1842                                 index, endIndex, client.GetServerError().Data()));
1843         
1844                         if (result)
1845                                 delete result;
1846                                 
1847                         return 0;
1848                 }
1849                 
1850                 AliInfo(Form("Retrieved entries %d..%d (total %d); E.g. %s has %d values collected",
1851                                         index, endIndex, totalEntries, entries->At(index)->GetName(), ((TObjArray*)
1852                                         partialResult->GetValue(entries->At(index)->GetName()))->GetEntriesFast()));
1853                 
1854                 if (!result)
1855                 {
1856                         result = partialResult;
1857                 }
1858                 else
1859                 {               
1860                         TIter iter(partialResult);
1861                         TObjString* key = 0;
1862                         while ((key = (TObjString*) iter.Next()))
1863                                 result->Add(key, partialResult->GetValue(key->String()));
1864                                 
1865                         partialResult->SetOwner(kFALSE);
1866                         delete partialResult;
1867                 }
1868         
1869         }
1870
1871         return result;
1872 }
1873 //______________________________________________________________________________________________
1874 const char* AliShuttle::GetFile(Int_t system, const char* detector,
1875                 const char* id, const char* source)
1876 {
1877         // Get calibration file from file exchange servers
1878         // First queris the FXS database for the file name, using the run, detector, id and source info
1879         // then calls RetrieveFile(filename) for actual copy to local disk
1880         // run: current run being processed (given by Logbook entry fLogbookEntry)
1881         // detector: the Preprocessor name
1882         // id: provided as a parameter by the Preprocessor
1883         // source: provided by the Preprocessor through GetFileSources function
1884
1885         // check if test mode should simulate a FXS error
1886         if (fTestMode & kErrorFXSFiles)
1887         {
1888                 Log(detector, Form("GetFile - In TESTMODE - Simulating error while connecting to %s FXS", GetSystemName(system)));
1889                 return 0;
1890         }
1891         
1892         // check connection, in case connect
1893         if (!Connect(system))
1894         {
1895                 Log(detector, Form("GetFile - Couldn't connect to %s FXS database", GetSystemName(system)));
1896                 return 0;
1897         }
1898
1899         // Query preparation
1900         TString sourceName(source);
1901         Int_t nFields = 3;
1902         TString sqlQueryStart = Form("select filePath,size,fileChecksum from %s where",
1903                                                                 fConfig->GetFXSdbTable(system));
1904         TString whereClause = Form("run=%d and detector=\"%s\" and fileId=\"%s\"",
1905                                                                 GetCurrentRun(), detector, id);
1906
1907         if (system == kDAQ)
1908         {
1909                 whereClause += Form(" and DAQsource=\"%s\"", source);
1910         }
1911         else if (system == kDCS)
1912         {
1913                 sourceName="none";
1914         }
1915         else if (system == kHLT)
1916         {
1917                 whereClause += Form(" and DDLnumbers=\"%s\"", source);
1918                 nFields = 3;
1919         }
1920
1921         TString sqlQuery = Form("%s %s", sqlQueryStart.Data(), whereClause.Data());
1922
1923         AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
1924
1925         // Query execution
1926         TSQLResult* aResult = 0;
1927         aResult = dynamic_cast<TSQLResult*> (fServer[system]->Query(sqlQuery));
1928         if (!aResult) {
1929                 Log(detector, Form("GetFileName - Can't execute SQL query to %s database for: id = %s, source = %s",
1930                                 GetSystemName(system), id, sourceName.Data()));
1931                 return 0;
1932         }
1933
1934         if(aResult->GetRowCount() == 0)
1935         {
1936                 Log(detector,
1937                         Form("GetFileName - No entry in %s FXS db for: id = %s, source = %s",
1938                                 GetSystemName(system), id, sourceName.Data()));
1939                 delete aResult;
1940                 return 0;
1941         }
1942
1943         if (aResult->GetRowCount() > 1) {
1944                 Log(detector,
1945                         Form("GetFileName - More than one entry in %s FXS db for: id = %s, source = %s",
1946                                 GetSystemName(system), id, sourceName.Data()));
1947                 delete aResult;
1948                 return 0;
1949         }
1950
1951         if (aResult->GetFieldCount() != nFields) {
1952                 Log(detector,
1953                         Form("GetFileName - Wrong field count in %s FXS db for: id = %s, source = %s",
1954                                 GetSystemName(system), id, sourceName.Data()));
1955                 delete aResult;
1956                 return 0;
1957         }
1958
1959         TSQLRow* aRow = dynamic_cast<TSQLRow*> (aResult->Next());
1960
1961         if (!aRow){
1962                 Log(detector, Form("GetFileName - Empty set result in %s FXS db from query: id = %s, source = %s",
1963                                 GetSystemName(system), id, sourceName.Data()));
1964                 delete aResult;
1965                 return 0;
1966         }
1967
1968         TString filePath(aRow->GetField(0), aRow->GetFieldLength(0));
1969         TString fileSize(aRow->GetField(1), aRow->GetFieldLength(1));
1970         TString fileChecksum(aRow->GetField(2), aRow->GetFieldLength(2));
1971
1972         delete aResult;
1973         delete aRow;
1974
1975         AliDebug(2, Form("filePath = %s; size = %s, fileChecksum = %s",
1976                                 filePath.Data(), fileSize.Data(), fileChecksum.Data()));
1977
1978         // retrieved file is renamed to make it unique
1979         TString localFileName = Form("%s_%s_%d_%s_%s.shuttle",
1980                                         GetSystemName(system), detector, GetCurrentRun(), id, sourceName.Data());
1981
1982
1983         // file retrieval from FXS
1984         UInt_t nRetries = 0;
1985         UInt_t maxRetries = 3;
1986         Bool_t result = kFALSE;
1987
1988         // copy!! if successful TSystem::Exec returns 0
1989         while(nRetries++ < maxRetries) {
1990                 AliDebug(2, Form("Trying to copy file. Retry # %d", nRetries));
1991                 result = RetrieveFile(system, filePath.Data(), localFileName.Data());
1992                 if(!result)
1993                 {
1994                         Log(detector, Form("GetFileName - Copy of file %s from %s FXS failed",
1995                                         filePath.Data(), GetSystemName(system)));
1996                         continue;
1997                 } 
1998
1999                 if (fileChecksum.Length()>0)
2000                 {
2001                         // compare md5sum of local file with the one stored in the FXS DB
2002                         Int_t md5Comp = gSystem->Exec(Form("md5sum %s/%s |grep %s 2>&1 > /dev/null",
2003                                                 GetShuttleTempDir(), localFileName.Data(), fileChecksum.Data()));
2004
2005                         if (md5Comp != 0)
2006                         {
2007                                 Log(detector, Form("GetFileName - md5sum of file %s does not match with local copy!",
2008                                                         filePath.Data()));
2009                                 result = kFALSE;
2010                                 continue;
2011                         }
2012                 } else {
2013                         Log(fCurrentDetector, Form("GetFile - md5sum of file %s not set in %s database, skipping comparison",
2014                                                         filePath.Data(), GetSystemName(system)));
2015                 }
2016                 if (result) break;
2017         }
2018
2019         if(!result) return 0;
2020
2021         fFXSCalled[system]=kTRUE;
2022         TObjString *fileParams = new TObjString(Form("%s#!?!#%s", id, sourceName.Data()));
2023         fFXSlist[system].Add(fileParams);
2024
2025         static TString fullLocalFileName;
2026         fullLocalFileName.Form("%s/%s", GetShuttleTempDir(), localFileName.Data());
2027
2028         Log(fCurrentDetector, Form("GetFile - Retrieved file with id %s and source %s from %s to %s", id, source, GetSystemName(system), fullLocalFileName.Data()));
2029
2030         return fullLocalFileName.Data();
2031 }
2032
2033 //______________________________________________________________________________________________
2034 Bool_t AliShuttle::RetrieveFile(UInt_t system, const char* fxsFileName, const char* localFileName)
2035 {
2036         //
2037         // Copies file from FXS to local Shuttle machine
2038         //
2039
2040         // check temp directory: trying to cd to temp; if it does not exist, create it
2041         AliDebug(2, Form("Copy file %s from %s FXS into %s/%s",
2042                         GetSystemName(system), fxsFileName, GetShuttleTempDir(), localFileName));
2043
2044         void* dir = gSystem->OpenDirectory(GetShuttleTempDir());
2045         if (dir == NULL) {
2046                 if (gSystem->mkdir(GetShuttleTempDir(), kTRUE)) {
2047                         AliError(Form("Can't open directory <%s>", GetShuttleTempDir()));
2048                         return kFALSE;
2049                 }
2050
2051         } else {
2052                 gSystem->FreeDirectory(dir);
2053         }
2054
2055         TString baseFXSFolder;
2056         if (system == kDAQ)
2057         {
2058                 baseFXSFolder = "FES/";
2059         }
2060         else if (system == kDCS)
2061         {
2062                 baseFXSFolder = "";
2063         }
2064         else if (system == kHLT)
2065         {
2066                 baseFXSFolder = "/opt/FXS/";
2067         }
2068
2069
2070         TString command = Form("scp -oPort=%d -2 %s@%s:%s%s %s/%s",
2071                 fConfig->GetFXSPort(system),
2072                 fConfig->GetFXSUser(system),
2073                 fConfig->GetFXSHost(system),
2074                 baseFXSFolder.Data(),
2075                 fxsFileName,
2076                 GetShuttleTempDir(),
2077                 localFileName);
2078
2079         AliDebug(2, Form("%s",command.Data()));
2080
2081         Bool_t result = (gSystem->Exec(command.Data()) == 0);
2082
2083         return result;
2084 }
2085
2086 //______________________________________________________________________________________________
2087 TList* AliShuttle::GetFileSources(Int_t system, const char* detector, const char* id)
2088 {
2089         //
2090         // Get sources producing the condition file Id from file exchange servers
2091         // if id is NULL all sources are returned (distinct)
2092         //
2093
2094         Log(detector, Form("GetFileSources - Retrieving sources with id %s from %s", id, GetSystemName(system)));
2095         
2096         // check if test mode should simulate a FXS error
2097         if (fTestMode & kErrorFXSSources)
2098         {
2099                 Log(detector, Form("GetFileSources - In TESTMODE - Simulating error while connecting to %s FXS", GetSystemName(system)));
2100                 return 0;
2101         }
2102
2103         if (system == kDCS)
2104         {
2105                 AliWarning("DCS system has only one source of data!");
2106                 TList *list = new TList();
2107                 list->SetOwner(1);
2108                 list->Add(new TObjString(" "));
2109                 return list;
2110         }
2111
2112         // check connection, in case connect
2113         if (!Connect(system))
2114         {
2115                 Log(detector, Form("GetFileSources - Couldn't connect to %s FXS database", GetSystemName(system)));
2116                 return NULL;
2117         }
2118
2119         TString sourceName = 0;
2120         if (system == kDAQ)
2121         {
2122                 sourceName = "DAQsource";
2123         } else if (system == kHLT)
2124         {
2125                 sourceName = "DDLnumbers";
2126         }
2127
2128         TString sqlQueryStart = Form("select distinct %s from %s where", sourceName.Data(), fConfig->GetFXSdbTable(system));
2129         TString whereClause = Form("run=%d and detector=\"%s\"",
2130                                 GetCurrentRun(), detector);
2131         if (id)
2132                 whereClause += Form(" and fileId=\"%s\"", id);
2133         TString sqlQuery = Form("%s %s", sqlQueryStart.Data(), whereClause.Data());
2134
2135         AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
2136
2137         // Query execution
2138         TSQLResult* aResult;
2139         aResult = fServer[system]->Query(sqlQuery);
2140         if (!aResult) {
2141                 Log(detector, Form("GetFileSources - Can't execute SQL query to %s database for id: %s",
2142                                 GetSystemName(system), id));
2143                 return 0;
2144         }
2145
2146         TList *list = new TList();
2147         list->SetOwner(1);
2148         
2149         if (aResult->GetRowCount() == 0)
2150         {
2151                 Log(detector,
2152                         Form("GetFileSources - No entry in %s FXS table for id: %s", GetSystemName(system), id));
2153                 delete aResult;
2154                 return list;
2155         }
2156
2157         Log(detector, Form("GetFileSources - Found %d sources", aResult->GetRowCount()));
2158
2159         TSQLRow* aRow;
2160         while ((aRow = aResult->Next()))
2161         {
2162
2163                 TString source(aRow->GetField(0), aRow->GetFieldLength(0));
2164                 AliDebug(2, Form("%s = %s", sourceName.Data(), source.Data()));
2165                 list->Add(new TObjString(source));
2166                 delete aRow;
2167         }
2168
2169         delete aResult;
2170
2171         return list;
2172 }
2173
2174 //______________________________________________________________________________________________
2175 TList* AliShuttle::GetFileIDs(Int_t system, const char* detector, const char* source)
2176 {
2177         //
2178         // Get all ids of condition files produced by a given source from file exchange servers
2179         //
2180         
2181         Log(detector, Form("GetFileIDs - Retrieving ids with source %s with %s", source, GetSystemName(system)));
2182
2183         // check if test mode should simulate a FXS error
2184         if (fTestMode & kErrorFXSSources)
2185         {
2186                 Log(detector, Form("GetFileIDs - In TESTMODE - Simulating error while connecting to %s FXS", GetSystemName(system)));
2187                 return 0;
2188         }
2189
2190         // check connection, in case connect
2191         if (!Connect(system))
2192         {
2193                 Log(detector, Form("GetFileIDs - Couldn't connect to %s FXS database", GetSystemName(system)));
2194                 return NULL;
2195         }
2196
2197         TString sourceName = 0;
2198         if (system == kDAQ)
2199         {
2200                 sourceName = "DAQsource";
2201         } else if (system == kHLT)
2202         {
2203                 sourceName = "DDLnumbers";
2204         }
2205
2206         TString sqlQueryStart = Form("select fileId from %s where", fConfig->GetFXSdbTable(system));
2207         TString whereClause = Form("run=%d and detector=\"%s\"",
2208                                 GetCurrentRun(), detector);
2209         if (sourceName.Length() > 0 && source)
2210                 whereClause += Form(" and %s=\"%s\"", sourceName.Data(), source);
2211         TString sqlQuery = Form("%s %s", sqlQueryStart.Data(), whereClause.Data());
2212
2213         AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
2214
2215         // Query execution
2216         TSQLResult* aResult;
2217         aResult = fServer[system]->Query(sqlQuery);
2218         if (!aResult) {
2219                 Log(detector, Form("GetFileIDs - Can't execute SQL query to %s database for source: %s",
2220                                 GetSystemName(system), source));
2221                 return 0;
2222         }
2223
2224         TList *list = new TList();
2225         list->SetOwner(1);
2226         
2227         if (aResult->GetRowCount() == 0)
2228         {
2229                 Log(detector,
2230                         Form("GetFileIDs - No entry in %s FXS table for source: %s", GetSystemName(system), source));
2231                 delete aResult;
2232                 return list;
2233         }
2234
2235         Log(detector, Form("GetFileIDs - Found %d ids", aResult->GetRowCount()));
2236
2237         TSQLRow* aRow;
2238
2239         while ((aRow = aResult->Next()))
2240         {
2241
2242                 TString id(aRow->GetField(0), aRow->GetFieldLength(0));
2243                 AliDebug(2, Form("fileId = %s", id.Data()));
2244                 list->Add(new TObjString(id));
2245                 delete aRow;
2246         }
2247
2248         delete aResult;
2249
2250         return list;
2251 }
2252
2253 //______________________________________________________________________________________________
2254 Bool_t AliShuttle::Connect(Int_t system)
2255 {
2256         // Connect to MySQL Server of the system's FXS MySQL databases
2257         // DAQ Logbook, Shuttle Logbook and DAQ FXS db are on the same host
2258         //
2259
2260         // check connection: if already connected return
2261         if(fServer[system] && fServer[system]->IsConnected()) return kTRUE;
2262
2263         TString dbHost, dbUser, dbPass, dbName;
2264
2265         if (system < 3) // FXS db servers
2266         {
2267                 dbHost = Form("mysql://%s:%d", fConfig->GetFXSdbHost(system), fConfig->GetFXSdbPort(system));
2268                 dbUser = fConfig->GetFXSdbUser(system);
2269                 dbPass = fConfig->GetFXSdbPass(system);
2270                 dbName =   fConfig->GetFXSdbName(system);
2271         } else { // Run & Shuttle logbook servers
2272         // TODO Will the Shuttle logbook server be the same as the Run logbook server ???
2273                 dbHost = Form("mysql://%s:%d", fConfig->GetDAQlbHost(), fConfig->GetDAQlbPort());
2274                 dbUser = fConfig->GetDAQlbUser();
2275                 dbPass = fConfig->GetDAQlbPass();
2276                 dbName =   fConfig->GetDAQlbDB();
2277         }
2278
2279         fServer[system] = TSQLServer::Connect(dbHost.Data(), dbUser.Data(), dbPass.Data());
2280         if (!fServer[system] || !fServer[system]->IsConnected()) {
2281                 if(system < 3)
2282                 {
2283                 AliError(Form("Can't establish connection to FXS database for %s",
2284                                         AliShuttleInterface::GetSystemName(system)));
2285                 } else {
2286                 AliError("Can't establish connection to Run logbook.");
2287                 }
2288                 if(fServer[system]) delete fServer[system];
2289                 return kFALSE;
2290         }
2291
2292         // Get tables
2293         TSQLResult* aResult=0;
2294         switch(system){
2295                 case kDAQ:
2296                         aResult = fServer[kDAQ]->GetTables(dbName.Data());
2297                         break;
2298                 case kDCS:
2299                         aResult = fServer[kDCS]->GetTables(dbName.Data());
2300                         break;
2301                 case kHLT:
2302                         aResult = fServer[kHLT]->GetTables(dbName.Data());
2303                         break;
2304                 default:
2305                         aResult = fServer[3]->GetTables(dbName.Data());
2306                         break;
2307         }
2308
2309         delete aResult;
2310         return kTRUE;
2311 }
2312
2313 //______________________________________________________________________________________________
2314 Bool_t AliShuttle::UpdateTable()
2315 {
2316         //
2317         // Update FXS table filling time_processed field in all rows corresponding to current run and detector
2318         //
2319
2320         Bool_t result = kTRUE;
2321
2322         for (UInt_t system=0; system<3; system++)
2323         {
2324                 if(!fFXSCalled[system]) continue;
2325
2326                 // check connection, in case connect
2327                 if (!Connect(system))
2328                 {
2329                         Log(fCurrentDetector, Form("UpdateTable - Couldn't connect to %s FXS database", GetSystemName(system)));
2330                         result = kFALSE;
2331                         continue;
2332                 }
2333
2334                 TTimeStamp now; // now
2335
2336                 // Loop on FXS list entries
2337                 TIter iter(&fFXSlist[system]);
2338                 TObjString *aFXSentry=0;
2339                 while ((aFXSentry = dynamic_cast<TObjString*> (iter.Next())))
2340                 {
2341                         TString aFXSentrystr = aFXSentry->String();
2342                         TObjArray *aFXSarray = aFXSentrystr.Tokenize("#!?!#");
2343                         if (!aFXSarray || aFXSarray->GetEntries() != 2 )
2344                         {
2345                                 Log(fCurrentDetector, Form("UpdateTable - error updating %s FXS entry. Check string: <%s>",
2346                                         GetSystemName(system), aFXSentrystr.Data()));
2347                                 if(aFXSarray) delete aFXSarray;
2348                                 result = kFALSE;
2349                                 continue;
2350                         }
2351                         const char* fileId = ((TObjString*) aFXSarray->At(0))->GetName();
2352                         const char* source = ((TObjString*) aFXSarray->At(1))->GetName();
2353
2354                         TString whereClause;
2355                         if (system == kDAQ)
2356                         {
2357                                 whereClause = Form("where run=%d and detector=\"%s\" and fileId=\"%s\" and DAQsource=\"%s\";",
2358                                                         GetCurrentRun(), fCurrentDetector.Data(), fileId, source);
2359                         }
2360                         else if (system == kDCS)
2361                         {
2362                                 whereClause = Form("where run=%d and detector=\"%s\" and fileId=\"%s\";",
2363                                                         GetCurrentRun(), fCurrentDetector.Data(), fileId);
2364                         }
2365                         else if (system == kHLT)
2366                         {
2367                                 whereClause = Form("where run=%d and detector=\"%s\" and fileId=\"%s\" and DDLnumbers=\"%s\";",
2368                                                         GetCurrentRun(), fCurrentDetector.Data(), fileId, source);
2369                         }
2370
2371                         delete aFXSarray;
2372
2373                         TString sqlQuery = Form("update %s set time_processed=%d %s", fConfig->GetFXSdbTable(system),
2374                                                                 now.GetSec(), whereClause.Data());
2375
2376                         AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
2377
2378                         // Query execution
2379                         TSQLResult* aResult;
2380                         aResult = dynamic_cast<TSQLResult*> (fServer[system]->Query(sqlQuery));
2381                         if (!aResult)
2382                         {
2383                                 Log(fCurrentDetector, Form("UpdateTable - %s db: can't execute SQL query <%s>",
2384                                                                 GetSystemName(system), sqlQuery.Data()));
2385                                 result = kFALSE;
2386                                 continue;
2387                         }
2388                         delete aResult;
2389                 }
2390         }
2391
2392         return result;
2393 }
2394
2395 //______________________________________________________________________________________________
2396 Bool_t AliShuttle::UpdateTableFailCase()
2397 {
2398         // Update FXS table filling time_processed field in all rows corresponding to current run and detector
2399         // this is called in case the preprocessor is declared failed for the current run, because
2400         // the fields are updated only in case of success
2401
2402         Bool_t result = kTRUE;
2403
2404         for (UInt_t system=0; system<3; system++)
2405         {
2406                 // check connection, in case connect
2407                 if (!Connect(system))
2408                 {
2409                         Log(fCurrentDetector, Form("UpdateTableFailCase - Couldn't connect to %s FXS database",
2410                                                         GetSystemName(system)));
2411                         result = kFALSE;
2412                         continue;
2413                 }
2414
2415                 TTimeStamp now; // now
2416
2417                 // Loop on FXS list entries
2418
2419                 TString whereClause = Form("where run=%d and detector=\"%s\";",
2420                                                 GetCurrentRun(), fCurrentDetector.Data());
2421
2422
2423                 TString sqlQuery = Form("update %s set time_processed=%d %s", fConfig->GetFXSdbTable(system),
2424                                                         now.GetSec(), whereClause.Data());
2425
2426                 AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
2427
2428                 // Query execution
2429                 TSQLResult* aResult;
2430                 aResult = dynamic_cast<TSQLResult*> (fServer[system]->Query(sqlQuery));
2431                 if (!aResult)
2432                 {
2433                         Log(fCurrentDetector, Form("UpdateTableFailCase - %s db: can't execute SQL query <%s>",
2434                                                         GetSystemName(system), sqlQuery.Data()));
2435                         result = kFALSE;
2436                         continue;
2437                 }
2438                 delete aResult;
2439         }
2440
2441         return result;
2442 }
2443
2444 //______________________________________________________________________________________________
2445 Bool_t AliShuttle::UpdateShuttleLogbook(const char* detector, const char* status)
2446 {
2447         //
2448         // Update Shuttle logbook filling detector or shuttle_done column
2449         // ex. of usage: UpdateShuttleLogbook("PHOS", "DONE") or UpdateShuttleLogbook("shuttle_done")
2450         //
2451
2452         // check connection, in case connect
2453         if(!Connect(3)){
2454                 Log("SHUTTLE", "UpdateShuttleLogbook - Couldn't connect to DAQ Logbook.");
2455                 return kFALSE;
2456         }
2457
2458         TString detName(detector);
2459         TString setClause;
2460         if(detName == "shuttle_done")
2461         {
2462                 setClause = "set shuttle_done=1";
2463
2464                 // Send the information to ML
2465                 TMonaLisaText  mlStatus("SHUTTLE_status", "Done");
2466
2467                 TList mlList;
2468                 mlList.Add(&mlStatus);
2469
2470                 fMonaLisa->SendParameters(&mlList);
2471         } else {
2472                 TString statusStr(status);
2473                 if(statusStr.Contains("done", TString::kIgnoreCase) ||
2474                    statusStr.Contains("failed", TString::kIgnoreCase)){
2475                         setClause = Form("set %s=\"%s\"", detector, status);
2476                 } else {
2477                         Log("SHUTTLE",
2478                                 Form("UpdateShuttleLogbook - Invalid status <%s> for detector %s",
2479                                         status, detector));
2480                         return kFALSE;
2481                 }
2482         }
2483
2484         TString whereClause = Form("where run=%d", GetCurrentRun());
2485
2486         TString sqlQuery = Form("update %s %s %s",
2487                                         fConfig->GetShuttlelbTable(), setClause.Data(), whereClause.Data());
2488
2489         AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
2490
2491         // Query execution
2492         TSQLResult* aResult;
2493         aResult = dynamic_cast<TSQLResult*> (fServer[3]->Query(sqlQuery));
2494         if (!aResult) {
2495                 Log("SHUTTLE", Form("UpdateShuttleLogbook - Can't execute query <%s>", sqlQuery.Data()));
2496                 return kFALSE;
2497         }
2498         delete aResult;
2499
2500         return kTRUE;
2501 }
2502
2503 //______________________________________________________________________________________________
2504 Int_t AliShuttle::GetCurrentRun() const
2505 {
2506         //
2507         // Get current run from logbook entry
2508         //
2509
2510         return fLogbookEntry ? fLogbookEntry->GetRun() : -1;
2511 }
2512
2513 //______________________________________________________________________________________________
2514 UInt_t AliShuttle::GetCurrentStartTime() const
2515 {
2516         //
2517         // get current start time
2518         //
2519
2520         return fLogbookEntry ? fLogbookEntry->GetStartTime() : 0;
2521 }
2522
2523 //______________________________________________________________________________________________
2524 UInt_t AliShuttle::GetCurrentEndTime() const
2525 {
2526         //
2527         // get current end time from logbook entry
2528         //
2529
2530         return fLogbookEntry ? fLogbookEntry->GetEndTime() : 0;
2531 }
2532
2533 //______________________________________________________________________________________________
2534 void AliShuttle::Log(const char* detector, const char* message)
2535 {
2536         //
2537         // Fill log string with a message
2538         //
2539
2540         void* dir = gSystem->OpenDirectory(GetShuttleLogDir());
2541         if (dir == NULL) {
2542                 if (gSystem->mkdir(GetShuttleLogDir(), kTRUE)) {
2543                         AliError(Form("Can't open directory <%s>", GetShuttleLogDir()));
2544                         return;
2545                 }
2546
2547         } else {
2548                 gSystem->FreeDirectory(dir);
2549         }
2550
2551         TString toLog = Form("%s (%d): %s - ", TTimeStamp(time(0)).AsString("s"), getpid(), detector);
2552         if (GetCurrentRun() >= 0) 
2553                 toLog += Form("run %d - ", GetCurrentRun());
2554         toLog += Form("%s", message);
2555
2556         AliInfo(toLog.Data());
2557         
2558         // if we redirect the log output already to the file, leave here
2559         if (fOutputRedirected && strcmp(detector, "SHUTTLE") != 0)
2560                 return;
2561
2562         TString fileName = GetLogFileName(detector);
2563         
2564         gSystem->ExpandPathName(fileName);
2565
2566         ofstream logFile;
2567         logFile.open(fileName, ofstream::out | ofstream::app);
2568
2569         if (!logFile.is_open()) {
2570                 AliError(Form("Could not open file %s", fileName.Data()));
2571                 return;
2572         }
2573
2574         logFile << toLog.Data() << "\n";
2575
2576         logFile.close();
2577 }
2578
2579 //______________________________________________________________________________________________
2580 TString AliShuttle::GetLogFileName(const char* detector) const
2581 {
2582         // 
2583         // returns the name of the log file for a given sub detector
2584         //
2585         
2586         TString fileName;
2587         
2588         if (GetCurrentRun() >= 0) 
2589                 fileName.Form("%s/%s_%d.log", GetShuttleLogDir(), detector, GetCurrentRun());
2590         else
2591                 fileName.Form("%s/%s.log", GetShuttleLogDir(), detector);
2592
2593         return fileName;
2594 }
2595
2596 //______________________________________________________________________________________________
2597 Bool_t AliShuttle::Collect(Int_t run)
2598 {
2599         //
2600         // Collects conditions data for all UNPROCESSED run written to DAQ LogBook in case of run = -1 (default)
2601         // If a dedicated run is given this run is processed
2602         //
2603         // In operational mode, this is the Shuttle function triggered by the EOR signal.
2604         //
2605
2606         if (run == -1)
2607                 Log("SHUTTLE","Collect - Shuttle called. Collecting conditions data for unprocessed runs");
2608         else
2609                 Log("SHUTTLE", Form("Collect - Shuttle called. Collecting conditions data for run %d", run));
2610
2611         SetLastAction("Starting");
2612
2613         TString whereClause("where shuttle_done=0");
2614         if (run != -1)
2615                 whereClause += Form(" and run=%d", run);
2616
2617         TObjArray shuttleLogbookEntries;
2618         if (!QueryShuttleLogbook(whereClause, shuttleLogbookEntries))
2619         {
2620                 Log("SHUTTLE", "Collect - Can't retrieve entries from Shuttle logbook");
2621                 return kFALSE;
2622         }
2623
2624         if (shuttleLogbookEntries.GetEntries() == 0)
2625         {
2626                 if (run == -1)
2627                         Log("SHUTTLE","Collect - Found no UNPROCESSED runs in Shuttle logbook");
2628                 else
2629                         Log("SHUTTLE", Form("Collect - Run %d is already DONE "
2630                                                 "or it does not exist in Shuttle logbook", run));
2631                 return kTRUE;
2632         }
2633
2634         for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
2635                 fFirstUnprocessed[iDet] = kTRUE;
2636
2637         if (run != -1)
2638         {
2639                 // query Shuttle logbook for earlier runs, check if some detectors are unprocessed,
2640                 // flag them into fFirstUnprocessed array
2641                 TString whereClause(Form("where shuttle_done=0 and run < %d", run));
2642                 TObjArray tmpLogbookEntries;
2643                 if (!QueryShuttleLogbook(whereClause, tmpLogbookEntries))
2644                 {
2645                         Log("SHUTTLE", "Collect - Can't retrieve entries from Shuttle logbook");
2646                         return kFALSE;
2647                 }
2648
2649                 TIter iter(&tmpLogbookEntries);
2650                 AliShuttleLogbookEntry* anEntry = 0;
2651                 while ((anEntry = dynamic_cast<AliShuttleLogbookEntry*> (iter.Next())))
2652                 {
2653                         for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
2654                         {
2655                                 if (anEntry->GetDetectorStatus(iDet) == AliShuttleLogbookEntry::kUnprocessed)
2656                                 {
2657                                         AliDebug(2, Form("Run %d: setting %s as \"not first time unprocessed\"",
2658                                                         anEntry->GetRun(), GetDetName(iDet)));
2659                                         fFirstUnprocessed[iDet] = kFALSE;
2660                                 }
2661                         }
2662
2663                 }
2664
2665         }
2666
2667         if (!RetrieveConditionsData(shuttleLogbookEntries))
2668         {
2669                 Log("SHUTTLE", "Collect - Process of at least one run failed");
2670                 return kFALSE;
2671         }
2672
2673         Log("SHUTTLE", "Collect - Requested run(s) successfully processed");
2674         return kTRUE;
2675 }
2676
2677 //______________________________________________________________________________________________
2678 Bool_t AliShuttle::RetrieveConditionsData(const TObjArray& dateEntries)
2679 {
2680         //
2681         // Retrieve conditions data for all runs that aren't processed yet
2682         //
2683
2684         Bool_t hasError = kFALSE;
2685
2686         TIter iter(&dateEntries);
2687         AliShuttleLogbookEntry* anEntry;
2688
2689         while ((anEntry = (AliShuttleLogbookEntry*) iter.Next())){
2690                 if (!Process(anEntry)){
2691                         hasError = kTRUE;
2692                 }
2693
2694                 // clean SHUTTLE temp directory
2695                 TString filename = Form("%s/*.shuttle", GetShuttleTempDir());
2696                 RemoveFile(filename.Data());
2697         }
2698
2699         return hasError == kFALSE;
2700 }
2701
2702 //______________________________________________________________________________________________
2703 ULong_t AliShuttle::GetTimeOfLastAction() const
2704 {
2705         //
2706         // Gets time of last action
2707         //
2708
2709         ULong_t tmp;
2710
2711         fMonitoringMutex->Lock();
2712
2713         tmp = fLastActionTime;
2714
2715         fMonitoringMutex->UnLock();
2716
2717         return tmp;
2718 }
2719
2720 //______________________________________________________________________________________________
2721 const TString AliShuttle::GetLastAction() const
2722 {
2723         //
2724         // returns a string description of the last action
2725         //
2726
2727         TString tmp;
2728
2729         fMonitoringMutex->Lock();
2730         
2731         tmp = fLastAction;
2732         
2733         fMonitoringMutex->UnLock();
2734
2735         return tmp;
2736 }
2737
2738 //______________________________________________________________________________________________
2739 void AliShuttle::SetLastAction(const char* action)
2740 {
2741         //
2742         // updates the monitoring variables
2743         //
2744
2745         fMonitoringMutex->Lock();
2746
2747         fLastAction = action;
2748         fLastActionTime = time(0);
2749         
2750         fMonitoringMutex->UnLock();
2751 }
2752
2753 //______________________________________________________________________________________________
2754 const char* AliShuttle::GetRunParameter(const char* param)
2755 {
2756         //
2757         // returns run parameter read from DAQ logbook
2758         //
2759
2760         if(!fLogbookEntry) {
2761                 AliError("No logbook entry!");
2762                 return 0;
2763         }
2764
2765         return fLogbookEntry->GetRunParameter(param);
2766 }
2767
2768 //______________________________________________________________________________________________
2769 AliCDBEntry* AliShuttle::GetFromOCDB(const char* detector, const AliCDBPath& path)
2770 {
2771         //
2772         // returns object from OCDB valid for current run
2773         //
2774
2775         if (fTestMode & kErrorOCDB)
2776         {
2777                 Log(detector, "GetFromOCDB - In TESTMODE - Simulating error with OCDB");
2778                 return 0;
2779         }
2780         
2781         AliCDBStorage *sto = AliCDBManager::Instance()->GetStorage(fgkMainCDB);
2782         if (!sto)
2783         {
2784                 Log(detector, "GetFromOCDB - Cannot activate main OCDB for query!");
2785                 return 0;
2786         }
2787
2788         return dynamic_cast<AliCDBEntry*> (sto->Get(path, GetCurrentRun()));
2789 }
2790
2791 //______________________________________________________________________________________________
2792 Bool_t AliShuttle::SendMail()
2793 {
2794         //
2795         // sends a mail to the subdetector expert in case of preprocessor error
2796         //
2797         
2798         if (fTestMode != kNone)
2799                 return kTRUE;
2800
2801         void* dir = gSystem->OpenDirectory(GetShuttleLogDir());
2802         if (dir == NULL)
2803         {
2804                 if (gSystem->mkdir(GetShuttleLogDir(), kTRUE))
2805                 {
2806                         AliError(Form("Can't open directory <%s>", GetShuttleLogDir()));
2807                         return kFALSE;
2808                 }
2809
2810         } else {
2811                 gSystem->FreeDirectory(dir);
2812         }
2813
2814         TString bodyFileName;
2815         bodyFileName.Form("%s/mail.body", GetShuttleLogDir());
2816         gSystem->ExpandPathName(bodyFileName);
2817
2818         ofstream mailBody;
2819         mailBody.open(bodyFileName, ofstream::out);
2820
2821         if (!mailBody.is_open())
2822         {
2823                 AliError(Form("Could not open mail body file %s", bodyFileName.Data()));
2824                 return kFALSE;
2825         }
2826
2827         TString to="";
2828         TIter iterExperts(fConfig->GetResponsibles(fCurrentDetector));
2829         TObjString *anExpert=0;
2830         while ((anExpert = (TObjString*) iterExperts.Next()))
2831         {
2832                 to += Form("%s,", anExpert->GetName());
2833         }
2834         to.Remove(to.Length()-1);
2835         AliDebug(2, Form("to: %s",to.Data()));
2836
2837         if (to.IsNull()) {
2838                 AliInfo("List of detector responsibles not yet set!");
2839                 return kFALSE;
2840         }
2841
2842         TString cc="alberto.colla@cern.ch";
2843
2844         TString subject = Form("%s Shuttle preprocessor FAILED in run %d !",
2845                                 fCurrentDetector.Data(), GetCurrentRun());
2846         AliDebug(2, Form("subject: %s", subject.Data()));
2847
2848         TString body = Form("Dear %s expert(s), \n\n", fCurrentDetector.Data());
2849         body += Form("SHUTTLE just detected that your preprocessor "
2850                         "failed processing run %d!!\n\n", GetCurrentRun());
2851         body += Form("Please check %s status on the SHUTTLE monitoring page: \n\n", fCurrentDetector.Data());
2852         body += Form("\thttp://pcalimonitor.cern.ch:8889/shuttle.jsp?time=168 \n\n");
2853         body += Form("Find the %s log for the current run on \n\n"
2854                 "\thttp://pcalishuttle01.cern.ch:8880/logs/%s_%d.log \n\n", 
2855                 fCurrentDetector.Data(), fCurrentDetector.Data(), GetCurrentRun());
2856         body += Form("The last 10 lines of %s log file are following:\n\n");
2857
2858         AliDebug(2, Form("Body begin: %s", body.Data()));
2859
2860         mailBody << body.Data();
2861         mailBody.close();
2862         mailBody.open(bodyFileName, ofstream::out | ofstream::app);
2863
2864         TString logFileName = Form("%s/%s_%d.log", GetShuttleLogDir(), fCurrentDetector.Data(), GetCurrentRun());
2865         TString tailCommand = Form("tail -n 10 %s >> %s", logFileName.Data(), bodyFileName.Data());
2866         if (gSystem->Exec(tailCommand.Data()))
2867         {
2868                 mailBody << Form("%s log file not found ...\n\n", fCurrentDetector.Data());
2869         }
2870
2871         TString endBody = Form("------------------------------------------------------\n\n");
2872         endBody += Form("In case of problems please contact the SHUTTLE core team.\n\n");
2873         endBody += "Please do not answer this message directly, it is automatically generated.\n\n";
2874         endBody += "Greetings,\n\n \t\t\tthe SHUTTLE\n";
2875
2876         AliDebug(2, Form("Body end: %s", endBody.Data()));
2877
2878         mailBody << endBody.Data();
2879
2880         mailBody.close();
2881
2882         // send mail!
2883         TString mailCommand = Form("mail -s \"%s\" -c %s %s < %s",
2884                                                 subject.Data(),
2885                                                 cc.Data(),
2886                                                 to.Data(),
2887                                                 bodyFileName.Data());
2888         AliDebug(2, Form("mail command: %s", mailCommand.Data()));
2889
2890         Bool_t result = gSystem->Exec(mailCommand.Data());
2891
2892         return result == 0;
2893 }
2894
2895 //______________________________________________________________________________________________
2896 const char* AliShuttle::GetRunType()
2897 {
2898         //
2899         // returns run type read from "run type" logbook
2900         //
2901
2902         if(!fLogbookEntry) {
2903                 AliError("No logbook entry!");
2904                 return 0;
2905         }
2906
2907         return fLogbookEntry->GetRunType();
2908 }
2909
2910 //______________________________________________________________________________________________
2911 void AliShuttle::SetShuttleTempDir(const char* tmpDir)
2912 {
2913         //
2914         // sets Shuttle temp directory
2915         //
2916
2917         fgkShuttleTempDir = gSystem->ExpandPathName(tmpDir);
2918 }
2919
2920 //______________________________________________________________________________________________
2921 void AliShuttle::SetShuttleLogDir(const char* logDir)
2922 {
2923         //
2924         // sets Shuttle log directory
2925         //
2926
2927         fgkShuttleLogDir = gSystem->ExpandPathName(logDir);
2928 }