]> git.uio.no Git - u/mrichter/AliRoot.git/blob - SHUTTLE/AliShuttle.cxx
Number of columns in MySql Shuttle logbook increased by one (HLT added)
[u/mrichter/AliRoot.git] / SHUTTLE / AliShuttle.cxx
1 /**************************************************************************
2  * Copyright(c) 1998-1999, ALICE Experiment at CERN, All rights reserved. *
3  *                                                                        *
4  * Author: The ALICE Off-line Project.                                    *
5  * Contributors are mentioned in the code where appropriate.              *
6  *                                                                        *
7  * Permission to use, copy, modify and distribute this software and its   *
8  * documentation strictly for non-commercial purposes is hereby granted   *
9  * without fee, provided that the above copyright notice appears in all   *
10  * copies and that both the copyright notice and this permission notice   *
11  * appear in the supporting documentation. The authors make no claims     *
12  * about the suitability of this software for any purpose. It is          *
13  * provided "as is" without express or implied warranty.                  *
14  **************************************************************************/
15
16 /*
17 $Log$
18 Revision 1.48  2007/06/21 13:06:19  acolla
19 GetFileSources returns dummy list with 1 source if system=DCS (better than
20 returning error as it was)
21
22 Revision 1.47  2007/06/19 17:28:56  acolla
23 HLT updated; missing map bug removed.
24
25 Revision 1.46  2007/06/09 13:01:09  jgrosseo
26 Switching to retrieval of several DCS DPs at a time (multiDPrequest)
27
28 Revision 1.45  2007/05/30 06:35:20  jgrosseo
29 Adding functionality to the Shuttle/TestShuttle:
30 o) Function to retrieve list of sources from a given system (GetFileSources with id=0)
31 o) Function to retrieve list of IDs for a given source      (GetFileIDs)
32 These functions are needed for dealing with the tag files that are saved for the GRP preprocessor
33 Example code has been added to the TestProcessor in TestShuttle
34
35 Revision 1.44  2007/05/11 16:09:32  acolla
36 Reference files for ITS, MUON and PHOS are now stored in OfflineDetName/OnlineDetName/run_...
37 example: ITS/SPD/100_filename.root
38
39 Revision 1.43  2007/05/10 09:59:51  acolla
40 Various bug fixes in StoreRefFilesToGrid; Cleaning of reference storage before processing detector (CleanReferenceStorage)
41
42 Revision 1.42  2007/05/03 08:01:39  jgrosseo
43 typo in last commit :-(
44
45 Revision 1.41  2007/05/03 08:00:48  jgrosseo
46 fixing log message when pp want to skip dcs value retrieval
47
48 Revision 1.40  2007/04/27 07:06:48  jgrosseo
49 GetFileSources returns empty list in case of no files, but successful query
50 No mails sent in testmode
51
52 Revision 1.39  2007/04/17 12:43:57  acolla
53 Correction in StoreOCDB; change of text in mail to detector expert
54
55 Revision 1.38  2007/04/12 08:26:18  jgrosseo
56 updated comment
57
58 Revision 1.37  2007/04/10 16:53:14  jgrosseo
59 redirecting sub detector stdout, stderr to sub detector log file
60
61 Revision 1.35  2007/04/04 16:26:38  acolla
62 1. Re-organization of function calls in TestPreprocessor to make it more meaningful.
63 2. Added missing dependency in test preprocessors.
64 3. in AliShuttle.cxx: processing time and memory consumption info on a single line.
65
66 Revision 1.34  2007/04/04 10:33:36  jgrosseo
67 1) Storing of files to the Grid is now done _after_ your preprocessors succeeded. This is transparent, which means that you can still use the same functions (Store, StoreReferenceData) to store files to the Grid. However, the Shuttle first stores them locally and transfers them after the preprocessor finished. The return code of these two functions has changed from UInt_t to Bool_t which gives you the success of the storing.
68 In case of an error with the Grid, the Shuttle will retry the storing later, the preprocessor does not need to be run again.
69
70 2) The meaning of the return code of the preprocessor has changed. 0 is now success and any other value means failure. This value is stored in the log and you can use it to keep details about the error condition.
71
72 3) New function StoreReferenceFile to _directly_ store a file (without opening it) to the reference storage.
73
74 4) The memory usage of the preprocessor is monitored. If it exceeds 2 GB it is terminated.
75
76 5) New function AliPreprocessor::ProcessDCS(). If you do not need to have DCS data in all cases, you can skip the processing by implemting this function and returning kFALSE under certain conditions. E.g. if there is a certain run type.
77 If you always need DCS data (like before), you do not need to implement it.
78
79 6) The run type has been added to the monitoring page
80
81 Revision 1.33  2007/04/03 13:56:01  acolla
82 Grid Storage at the end of preprocessing. Added virtual method to disable DCS query according to the
83 run type.
84
85 Revision 1.32  2007/02/28 10:41:56  acolla
86 Run type field added in SHUTTLE framework. Run type is read from "run type" logbook and retrieved by
87 AliPreprocessor::GetRunType() function.
88 Added some ldap definition files.
89
90 Revision 1.30  2007/02/13 11:23:21  acolla
91 Moved getters and setters of Shuttle's main OCDB/Reference, local
92 OCDB/Reference, temp and log folders to AliShuttleInterface
93
94 Revision 1.27  2007/01/30 17:52:42  jgrosseo
95 adding monalisa monitoring
96
97 Revision 1.26  2007/01/23 19:20:03  acolla
98 Removed old ldif files, added TOF, MCH ldif files. Added some options in
99 AliShuttleConfig::Print. Added in Ali Shuttle: SetShuttleTempDir and
100 SetShuttleLogDir
101
102 Revision 1.25  2007/01/15 19:13:52  acolla
103 Moved some AliInfo to AliDebug in SendMail function
104
105 Revision 1.21  2006/12/07 08:51:26  jgrosseo
106 update (alberto):
107 table, db names in ldap configuration
108 added GRP preprocessor
109 DCS data can also be retrieved by data point
110
111 Revision 1.20  2006/11/16 16:16:48  jgrosseo
112 introducing strict run ordering flag
113 removed giving preprocessor name to preprocessor, they have to know their name themselves ;-)
114
115 Revision 1.19  2006/11/06 14:23:04  jgrosseo
116 major update (Alberto)
117 o) reading of run parameters from the logbook
118 o) online offline naming conversion
119 o) standalone DCSclient package
120
121 Revision 1.18  2006/10/20 15:22:59  jgrosseo
122 o) Adding time out to the execution of the preprocessors: The Shuttle forks and the parent process monitors the child
123 o) Merging Collect, CollectAll, CollectNew function
124 o) Removing implementation of empty copy constructors (declaration still there!)
125
126 Revision 1.17  2006/10/05 16:20:55  jgrosseo
127 adapting to new CDB classes
128
129 Revision 1.16  2006/10/05 15:46:26  jgrosseo
130 applying to the new interface
131
132 Revision 1.15  2006/10/02 16:38:39  jgrosseo
133 update (alberto):
134 fixed memory leaks
135 storing of objects that failed to be stored to the grid before
136 interfacing of shuttle status table in daq system
137
138 Revision 1.14  2006/08/29 09:16:05  jgrosseo
139 small update
140
141 Revision 1.13  2006/08/15 10:50:00  jgrosseo
142 effc++ corrections (alberto)
143
144 Revision 1.12  2006/08/08 14:19:29  jgrosseo
145 Update to shuttle classes (Alberto)
146
147 - Possibility to set the full object's path in the Preprocessor's and
148 Shuttle's  Store functions
149 - Possibility to extend the object's run validity in the same classes
150 ("startValidity" and "validityInfinite" parameters)
151 - Implementation of the StoreReferenceData function to store reference
152 data in a dedicated CDB storage.
153
154 Revision 1.11  2006/07/21 07:37:20  jgrosseo
155 last run is stored after each run
156
157 Revision 1.10  2006/07/20 09:54:40  jgrosseo
158 introducing status management: The processing per subdetector is divided into several steps,
159 after each step the status is stored on disk. If the system crashes in any of the steps the Shuttle
160 can keep track of the number of failures and skips further processing after a certain threshold is
161 exceeded. These thresholds can be configured in LDAP.
162
163 Revision 1.9  2006/07/19 10:09:55  jgrosseo
164 new configuration, accesst to DAQ FES (Alberto)
165
166 Revision 1.8  2006/07/11 12:44:36  jgrosseo
167 adding parameters for extended validity range of data produced by preprocessor
168
169 Revision 1.7  2006/07/10 14:37:09  jgrosseo
170 small fix + todo comment
171
172 Revision 1.6  2006/07/10 13:01:41  jgrosseo
173 enhanced storing of last sucessfully processed run (alberto)
174
175 Revision 1.5  2006/07/04 14:59:57  jgrosseo
176 revision of AliDCSValue: Removed wrapper classes, reduced storage size per value by factor 2
177
178 Revision 1.4  2006/06/12 09:11:16  jgrosseo
179 coding conventions (Alberto)
180
181 Revision 1.3  2006/06/06 14:26:40  jgrosseo
182 o) removed files that were moved to STEER
183 o) shuttle updated to follow the new interface (Alberto)
184
185 Revision 1.2  2006/03/07 07:52:34  hristov
186 New version (B.Yordanov)
187
188 Revision 1.6  2005/11/19 17:19:14  byordano
189 RetrieveDATEEntries and RetrieveConditionsData added
190
191 Revision 1.5  2005/11/19 11:09:27  byordano
192 AliShuttle declaration added
193
194 Revision 1.4  2005/11/17 17:47:34  byordano
195 TList changed to TObjArray
196
197 Revision 1.3  2005/11/17 14:43:23  byordano
198 import to local CVS
199
200 Revision 1.1.1.1  2005/10/28 07:33:58  hristov
201 Initial import as subdirectory in AliRoot
202
203 Revision 1.2  2005/09/13 08:41:15  byordano
204 default startTime endTime added
205
206 Revision 1.4  2005/08/30 09:13:02  byordano
207 some docs added
208
209 Revision 1.3  2005/08/29 21:15:47  byordano
210 some docs added
211
212 */
213
214 //
215 // This class is the main manager for AliShuttle. 
216 // It organizes the data retrieval from DCS and call the 
217 // interface methods of AliPreprocessor.
218 // For every detector in AliShuttleConfgi (see AliShuttleConfig),
219 // data for its set of aliases is retrieved. If there is registered
220 // AliPreprocessor for this detector then it will be used
221 // accroding to the schema (see AliPreprocessor).
222 // If there isn't registered AliPreprocessor than the retrieved
223 // data is stored automatically to the undelying AliCDBStorage.
224 // For detSpec is used the alias name.
225 //
226
227 #include "AliShuttle.h"
228
229 #include "AliCDBManager.h"
230 #include "AliCDBStorage.h"
231 #include "AliCDBId.h"
232 #include "AliCDBRunRange.h"
233 #include "AliCDBPath.h"
234 #include "AliCDBEntry.h"
235 #include "AliShuttleConfig.h"
236 #include "DCSClient/AliDCSClient.h"
237 #include "AliLog.h"
238 #include "AliPreprocessor.h"
239 #include "AliShuttleStatus.h"
240 #include "AliShuttleLogbookEntry.h"
241
242 #include <TSystem.h>
243 #include <TObject.h>
244 #include <TString.h>
245 #include <TTimeStamp.h>
246 #include <TObjString.h>
247 #include <TSQLServer.h>
248 #include <TSQLResult.h>
249 #include <TSQLRow.h>
250 #include <TMutex.h>
251 #include <TSystemDirectory.h>
252 #include <TSystemFile.h>
253 #include <TFileMerger.h>
254 #include <TGrid.h>
255 #include <TGridResult.h>
256
257 #include <TMonaLisaWriter.h>
258
259 #include <fstream>
260
261 #include <sys/types.h>
262 #include <sys/wait.h>
263
264 ClassImp(AliShuttle)
265
266 //______________________________________________________________________________________________
267 AliShuttle::AliShuttle(const AliShuttleConfig* config,
268                 UInt_t timeout, Int_t retries):
269 fConfig(config),
270 fTimeout(timeout), fRetries(retries),
271 fPreprocessorMap(),
272 fLogbookEntry(0),
273 fCurrentDetector(),
274 fStatusEntry(0),
275 fMonitoringMutex(0),
276 fLastActionTime(0),
277 fLastAction(),
278 fMonaLisa(0),
279 fTestMode(kNone),
280 fReadTestMode(kFALSE),
281 fOutputRedirected(kFALSE)
282 {
283         //
284         // config: AliShuttleConfig used
285         // timeout: timeout used for AliDCSClient connection
286         // retries: the number of retries in case of connection error.
287         //
288
289         if (!fConfig->IsValid()) AliFatal("********** !!!!! Invalid configuration !!!!! **********");
290         for(int iSys=0;iSys<4;iSys++) {
291                 fServer[iSys]=0;
292                 if (iSys < 3)
293                         fFXSlist[iSys].SetOwner(kTRUE);
294         }
295         fPreprocessorMap.SetOwner(kTRUE);
296
297         for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
298                 fFirstUnprocessed[iDet] = kFALSE;
299
300         fMonitoringMutex = new TMutex();
301 }
302
303 //______________________________________________________________________________________________
304 AliShuttle::~AliShuttle()
305 {
306         //
307         // destructor
308         //
309
310         fPreprocessorMap.DeleteAll();
311         for(int iSys=0;iSys<4;iSys++)
312                 if(fServer[iSys]) {
313                         fServer[iSys]->Close();
314                         delete fServer[iSys];
315                         fServer[iSys] = 0;
316                 }
317
318         if (fStatusEntry){
319                 delete fStatusEntry;
320                 fStatusEntry = 0;
321         }
322         
323         if (fMonitoringMutex) 
324         {
325                 delete fMonitoringMutex;
326                 fMonitoringMutex = 0;
327         }
328 }
329
330 //______________________________________________________________________________________________
331 void AliShuttle::RegisterPreprocessor(AliPreprocessor* preprocessor)
332 {
333         //
334         // Registers new AliPreprocessor.
335         // It uses GetName() for indentificator of the pre processor.
336         // The pre processor is registered it there isn't any other
337         // with the same identificator (GetName()).
338         //
339
340         const char* detName = preprocessor->GetName();
341         if(GetDetPos(detName) < 0)
342                 AliFatal(Form("********** !!!!! Invalid detector name: %s !!!!! **********", detName));
343
344         if (fPreprocessorMap.GetValue(detName)) {
345                 AliWarning(Form("AliPreprocessor %s is already registered!", detName));
346                 return;
347         }
348
349         fPreprocessorMap.Add(new TObjString(detName), preprocessor);
350 }
351 //______________________________________________________________________________________________
352 Bool_t AliShuttle::Store(const AliCDBPath& path, TObject* object,
353                 AliCDBMetaData* metaData, Int_t validityStart, Bool_t validityInfinite)
354 {
355         // Stores a CDB object in the storage for offline reconstruction. Objects that are not needed for
356         // offline reconstruction, but should be stored anyway (e.g. for debugging) should NOT be stored
357         // using this function. Use StoreReferenceData instead!
358         // It calls StoreLocally function which temporarily stores the data locally; when the preprocessor
359         // finishes the data are transferred to the main storage (Grid).
360
361         return StoreLocally(fgkLocalCDB, path, object, metaData, validityStart, validityInfinite);
362 }
363
364 //______________________________________________________________________________________________
365 Bool_t AliShuttle::StoreReferenceData(const AliCDBPath& path, TObject* object, AliCDBMetaData* metaData)
366 {
367         // Stores a CDB object in the storage for reference data. This objects will not be available during
368         // offline reconstrunction. Use this function for reference data only!
369         // It calls StoreLocally function which temporarily stores the data locally; when the preprocessor
370         // finishes the data are transferred to the main storage (Grid).
371
372         return StoreLocally(fgkLocalRefStorage, path, object, metaData);
373 }
374
375 //______________________________________________________________________________________________
376 Bool_t AliShuttle::StoreLocally(const TString& localUri,
377                         const AliCDBPath& path, TObject* object, AliCDBMetaData* metaData,
378                         Int_t validityStart, Bool_t validityInfinite)
379 {
380         // Store object temporarily in local storage. Parameters are passed by Store and StoreReferenceData functions.
381         // when the preprocessor finishes the data are transferred to the main storage (Grid).
382         // The parameters are:
383         //   1) Uri of the backup storage (Local)
384         //   2) the object's path.
385         //   3) the object to be stored
386         //   4) the metaData to be associated with the object
387         //   5) the validity start run number w.r.t. the current run,
388         //      if the data is valid only for this run leave the default 0
389         //   6) specifies if the calibration data is valid for infinity (this means until updated),
390         //      typical for calibration runs, the default is kFALSE
391         //
392         // returns 0 if fail, 1 otherwise
393
394         if (fTestMode & kErrorStorage)
395         {
396                 Log(fCurrentDetector, "StoreLocally - In TESTMODE - Simulating error while storing locally");
397                 return kFALSE;
398         }
399         
400         const char* cdbType = (localUri == fgkLocalCDB) ? "CDB" : "Reference";
401
402         Int_t firstRun = GetCurrentRun() - validityStart;
403         if(firstRun < 0) {
404                 AliWarning("First valid run happens to be less than 0! Setting it to 0.");
405                 firstRun=0;
406         }
407
408         Int_t lastRun = -1;
409         if(validityInfinite) {
410                 lastRun = AliCDBRunRange::Infinity();
411         } else {
412                 lastRun = GetCurrentRun();
413         }
414
415         // Version is set to current run, it will be used later to transfer data to Grid
416         AliCDBId id(path, firstRun, lastRun, GetCurrentRun(), -1);
417
418         if(! dynamic_cast<TObjString*> (metaData->GetProperty("RunUsed(TObjString)"))){
419                 TObjString runUsed = Form("%d", GetCurrentRun());
420                 metaData->SetProperty("RunUsed(TObjString)", runUsed.Clone());
421         }
422
423         Bool_t result = kFALSE;
424
425         if (!(AliCDBManager::Instance()->GetStorage(localUri))) {
426                 Log("SHUTTLE", Form("StoreLocally - Cannot activate local %s storage", cdbType));
427         } else {
428                 result = AliCDBManager::Instance()->GetStorage(localUri)
429                                         ->Put(object, id, metaData);
430         }
431
432         if(!result) {
433
434                 Log(fCurrentDetector, Form("StoreLocally - Can't store object <%s>!", id.ToString().Data()));
435         }
436
437         return result;
438 }
439
440 //______________________________________________________________________________________________
441 Bool_t AliShuttle::StoreOCDB()
442 {
443         //
444         // Called when preprocessor ends successfully or when previous storage attempt failed (kStoreError status)
445         // Calls underlying StoreOCDB(const char*) function twice, for OCDB and Reference storage.
446         // Then calls StoreRefFilesToGrid to store reference files. 
447         //
448         
449         if (fTestMode & kErrorGrid)
450         {
451                 Log("SHUTTLE", "StoreOCDB - In TESTMODE - Simulating error while storing in the Grid");
452                 Log(fCurrentDetector, "StoreOCDB - In TESTMODE - Simulating error while storing in the Grid");
453                 return kFALSE;
454         }
455         
456         Log("SHUTTLE","Storing OCDB data ...");
457         Bool_t resultCDB = StoreOCDB(fgkMainCDB);
458
459         Log("SHUTTLE","Storing reference data ...");
460         Bool_t resultRef = StoreOCDB(fgkMainRefStorage);
461         
462         Log("SHUTTLE","Storing reference files ...");
463         Bool_t resultRefFiles = StoreRefFilesToGrid();
464         
465         return resultCDB && resultRef && resultRefFiles;
466 }
467
468 //______________________________________________________________________________________________
469 Bool_t AliShuttle::StoreOCDB(const TString& gridURI)
470 {
471         //
472         // Called by StoreOCDB(), performs actual storage to the main OCDB and reference storages (Grid)
473         //
474
475         TObjArray* gridIds=0;
476
477         Bool_t result = kTRUE;
478
479         const char* type = 0;
480         TString localURI;
481         if(gridURI == fgkMainCDB) {
482                 type = "OCDB";
483                 localURI = fgkLocalCDB;
484         } else if(gridURI == fgkMainRefStorage) {
485                 type = "reference";
486                 localURI = fgkLocalRefStorage;
487         } else {
488                 AliError(Form("Invalid storage URI: %s", gridURI.Data()));
489                 return kFALSE;
490         }
491
492         AliCDBManager* man = AliCDBManager::Instance();
493
494         AliCDBStorage *gridSto = man->GetStorage(gridURI);
495         if(!gridSto) {
496                 Log("SHUTTLE",
497                         Form("StoreOCDB - cannot activate main %s storage", type));
498                 return kFALSE;
499         }
500
501         gridIds = gridSto->GetQueryCDBList();
502
503         // get objects previously stored in local CDB
504         AliCDBStorage *localSto = man->GetStorage(localURI);
505         if(!localSto) {
506                 Log("SHUTTLE",
507                         Form("StoreOCDB - cannot activate local %s storage", type));
508                 return kFALSE;
509         }
510         AliCDBPath aPath(GetOfflineDetName(fCurrentDetector.Data()),"*","*");
511         // Local objects were stored with current run as Grid version!
512         TList* localEntries = localSto->GetAll(aPath.GetPath(), GetCurrentRun(), GetCurrentRun());
513         localEntries->SetOwner(1);
514
515         // loop on local stored objects
516         TIter localIter(localEntries);
517         AliCDBEntry *aLocEntry = 0;
518         while((aLocEntry = dynamic_cast<AliCDBEntry*> (localIter.Next()))){
519                 aLocEntry->SetOwner(1);
520                 AliCDBId aLocId = aLocEntry->GetId();
521                 aLocEntry->SetVersion(-1);
522                 aLocEntry->SetSubVersion(-1);
523
524                 // If local object is valid up to infinity we store it only if it is
525                 // the first unprocessed run!
526                 if (aLocId.GetLastRun() == AliCDBRunRange::Infinity() &&
527                         !fFirstUnprocessed[GetDetPos(fCurrentDetector)])
528                 {
529                         Log("SHUTTLE", Form("StoreOCDB - %s: object %s has validity infinite but "
530                                                 "there are previous unprocessed runs!",
531                                                 fCurrentDetector.Data(), aLocId.GetPath().Data()));
532                         continue;
533                 }
534
535                 // loop on Grid valid Id's
536                 Bool_t store = kTRUE;
537                 TIter gridIter(gridIds);
538                 AliCDBId* aGridId = 0;
539                 while((aGridId = dynamic_cast<AliCDBId*> (gridIter.Next()))){
540                         if(aGridId->GetPath() != aLocId.GetPath()) continue;
541                         // skip all objects valid up to infinity
542                         if(aGridId->GetLastRun() == AliCDBRunRange::Infinity()) continue;
543                         // if we get here, it means there's already some more recent object stored on Grid!
544                         store = kFALSE;
545                         break;
546                 }
547
548                 // If we get here, the file can be stored!
549                 Bool_t storeOk = gridSto->Put(aLocEntry);
550                 if(!store || storeOk){
551
552                         if (!store)
553                         {
554                                 Log(fCurrentDetector.Data(),
555                                         Form("StoreOCDB - A more recent object already exists in %s storage: <%s>",
556                                                 type, aGridId->ToString().Data()));
557                         } else {
558                                 Log("SHUTTLE",
559                                         Form("StoreOCDB - Object <%s> successfully put into %s storage",
560                                                 aLocId.ToString().Data(), type));
561                                 Log(fCurrentDetector.Data(),
562                                         Form("StoreOCDB - Object <%s> successfully put into %s storage",
563                                                 aLocId.ToString().Data(), type));
564                         }
565
566                         // removing local filename...
567                         TString filename;
568                         localSto->IdToFilename(aLocId, filename);
569                         AliInfo(Form("Removing local file %s", filename.Data()));
570                         RemoveFile(filename.Data());
571                         continue;
572                 } else  {
573                         Log("SHUTTLE",
574                                 Form("StoreOCDB - Grid %s storage of object <%s> failed",
575                                         type, aLocId.ToString().Data()));
576                         Log(fCurrentDetector.Data(),
577                                 Form("StoreOCDB - Grid %s storage of object <%s> failed",
578                                         type, aLocId.ToString().Data()));
579                         result = kFALSE;
580                 }
581         }
582         localEntries->Clear();
583
584         return result;
585 }
586
587 //______________________________________________________________________________________________
588 Bool_t AliShuttle::CleanReferenceStorage(const char* detector)
589 {
590         // clears the directory used to store reference files of a given subdetector
591   
592         AliCDBManager* man = AliCDBManager::Instance();
593         AliCDBStorage* sto = man->GetStorage(fgkLocalRefStorage);
594         TString localBaseFolder = sto->GetBaseFolder();
595
596         TString targetDir = GetRefFilePrefix(localBaseFolder.Data(), detector);
597         
598         Log("SHUTTLE", Form("Cleaning %s", targetDir.Data()));
599
600         TString begin;
601         begin.Form("%d_", GetCurrentRun());
602         
603         TSystemDirectory* baseDir = new TSystemDirectory("/", targetDir);
604         if (!baseDir)
605                 return kTRUE;
606                 
607         TList* dirList = baseDir->GetListOfFiles();
608         delete baseDir;
609         
610         if (!dirList) return kTRUE;
611                         
612         if (dirList->GetEntries() < 3) 
613         {
614                 delete dirList;
615                 return kTRUE;
616         }
617                                 
618         Int_t nDirs = 0, nDel = 0;
619         TIter dirIter(dirList);
620         TSystemFile* entry = 0;
621
622         Bool_t success = kTRUE;
623         
624         while ((entry = dynamic_cast<TSystemFile*> (dirIter.Next())))
625         {                                       
626                 if (entry->IsDirectory())
627                         continue;
628                 
629                 TString fileName(entry->GetName());
630                 if (!fileName.BeginsWith(begin))
631                         continue;
632                         
633                 nDirs++;
634                                                 
635                 // delete file
636                 Int_t result = gSystem->Unlink(fileName.Data());
637                 
638                 if (result)
639                 {
640                         Log("SHUTTLE", Form("Could not delete file %s!", fileName.Data()));
641                         success = kFALSE;
642                 } else {
643                         nDel++;
644                 }
645         }
646
647         if(nDirs > 0)
648                 Log("SHUTTLE", Form("CleanReferenceStorage - %d (over %d) reference files in folder %s were deleted.", 
649                         nDel, nDirs, targetDir.Data()));
650
651                 
652         delete dirList;
653         return success;
654
655
656
657
658
659
660   Int_t result = gSystem->GetPathInfo(targetDir, 0, (Long64_t*) 0, 0, 0);
661   if (result == 0)
662   {
663     // delete directory
664     result = gSystem->Exec(Form("rm -r %s", targetDir.Data()));
665     if (result != 0)
666     {  
667       Log("SHUTTLE", Form("StoreReferenceFile - Could not clear directory %s", targetDir.Data()));
668       return kFALSE;
669     }
670   }
671
672   result = gSystem->mkdir(targetDir, kTRUE);
673   if (result != 0)
674   {
675     Log("SHUTTLE", Form("StoreReferenceFile - Error creating base directory %s", targetDir.Data()));
676     return kFALSE;
677   }
678         
679   return kTRUE;
680 }
681
682 //______________________________________________________________________________________________
683 Bool_t AliShuttle::StoreReferenceFile(const char* detector, const char* localFile, const char* gridFileName)
684 {
685         //
686         // Stores reference file directly (without opening it). This function stores the file locally.
687         //
688         // The file is stored under the following location: 
689         // <base folder of local reference storage>/<DET>/<RUN#>_<gridFileName>
690         // where <gridFileName> is the second parameter given to the function
691         // 
692         
693         if (fTestMode & kErrorStorage)
694         {
695                 Log(fCurrentDetector, "StoreReferenceFile - In TESTMODE - Simulating error while storing locally");
696                 return kFALSE;
697         }
698         
699         AliCDBManager* man = AliCDBManager::Instance();
700         AliCDBStorage* sto = man->GetStorage(fgkLocalRefStorage);
701         
702         TString localBaseFolder = sto->GetBaseFolder();
703         
704         TString targetDir = GetRefFilePrefix(localBaseFolder.Data(), detector); 
705         
706         //try to open folder, if does not exist
707         void* dir = gSystem->OpenDirectory(targetDir.Data());
708         if (dir == NULL) {
709                 if (gSystem->mkdir(targetDir.Data(), kTRUE)) {
710                         Log("SHUTTLE", Form("Can't open directory <%s>", targetDir.Data()));
711                         return kFALSE;
712                 }
713
714         } else {
715                 gSystem->FreeDirectory(dir);
716         }
717
718         TString target;
719         target.Form("%s/%d_%s", targetDir.Data(), GetCurrentRun(), gridFileName);
720         
721         Int_t result = gSystem->GetPathInfo(localFile, 0, (Long64_t*) 0, 0, 0);
722         if (result)
723         {
724                 Log("SHUTTLE", Form("StoreReferenceFile - %s does not exist", localFile));
725                 return kFALSE;
726         }
727
728         result = gSystem->CopyFile(localFile, target);
729
730         if (result == 0)
731         {
732                 Log("SHUTTLE", Form("StoreReferenceFile - File %s stored locally to %s", localFile, target.Data()));
733                 return kTRUE;
734         }
735         else
736         {
737                 Log("SHUTTLE", Form("StoreReferenceFile - Could not store file %s to %s!. Error code = %d", 
738                                 localFile, target.Data(), result));
739                 return kFALSE;
740         }       
741 }
742
743 //______________________________________________________________________________________________
744 Bool_t AliShuttle::StoreRefFilesToGrid()
745 {
746         //
747         // Transfers the reference file to the Grid.
748         //
749         // The files are stored under the following location: 
750         // <base folder of reference storage>/<DET>/<RUN#>_<gridFileName>
751         //
752         
753         AliCDBManager* man = AliCDBManager::Instance();
754         AliCDBStorage* sto = man->GetStorage(fgkLocalRefStorage);
755         if (!sto)
756                 return kFALSE;
757         TString localBaseFolder = sto->GetBaseFolder();
758                 
759         TString dir = GetRefFilePrefix(localBaseFolder.Data(), fCurrentDetector.Data());
760                 
761         AliCDBStorage* gridSto = man->GetStorage(fgkMainRefStorage);
762         if (!gridSto)
763                 return kFALSE;
764         
765         TString gridBaseFolder = gridSto->GetBaseFolder();
766
767         TString alienDir = GetRefFilePrefix(gridBaseFolder.Data(), fCurrentDetector.Data());
768         
769         TString begin;
770         begin.Form("%d_", GetCurrentRun());
771         
772         TSystemDirectory* baseDir = new TSystemDirectory("/", dir);
773         if (!baseDir)
774                 return kTRUE;
775                 
776         TList* dirList = baseDir->GetListOfFiles();
777         delete baseDir;
778         
779         if (!dirList) return kTRUE;
780                 
781         if (dirList->GetEntries() < 3) 
782         {
783                 delete dirList;
784                 return kTRUE;
785         }
786                         
787         if (!gGrid)
788         { 
789                 Log("SHUTTLE", "Connection to Grid failed: Cannot continue!");
790                 delete dirList;
791                 return kFALSE;
792         }
793         
794         Int_t nDirs = 0, nTransfer = 0;
795         TIter dirIter(dirList);
796         TSystemFile* entry = 0;
797
798         Bool_t success = kTRUE;
799         Bool_t first = kTRUE;
800         
801         while ((entry = dynamic_cast<TSystemFile*> (dirIter.Next())))
802         {                       
803                 if (entry->IsDirectory())
804                         continue;
805                         
806                 TString fileName(entry->GetName());
807                 if (!fileName.BeginsWith(begin))
808                         continue;
809                         
810                 nDirs++;
811                         
812                 if (first)
813                 {
814                         first = kFALSE;
815                         // check that DET folder exists, otherwise create it
816                         TGridResult* result = gGrid->Ls(alienDir.Data(), "a");
817                         
818                         if (!result)
819                         {
820                                 delete dirList;
821                                 return kFALSE;
822                         }
823                         
824                         if (!result->GetFileName(1)) // TODO: It looks like element 0 is always 0!!
825                         {
826                                 if (!gGrid->Mkdir(alienDir.Data(),"",0))
827                                 {
828                                         Log("SHUTTLE", Form("StoreRefFilesToGrid - Cannot create directory %s",
829                                                         alienDir.Data()));
830                                         delete dirList;
831                                         return kFALSE;
832                                 } else {
833                                         Log("SHUTTLE",Form("Folder %s created", alienDir.Data()));
834                                 }
835                                 
836                         } else {
837                                         Log("SHUTTLE",Form("Folder %s found", alienDir.Data()));
838                         }
839                 }
840                         
841                 TString fullLocalPath;
842                 fullLocalPath.Form("%s/%s", dir.Data(), fileName.Data());
843                 
844                 TString fullGridPath;
845                 fullGridPath.Form("alien://%s/%s", alienDir.Data(), fileName.Data());
846
847                 TFileMerger fileMerger;
848                 Bool_t result = fileMerger.Cp(fullLocalPath, fullGridPath);
849                 
850                 if (result)
851                 {
852                         Log("SHUTTLE", Form("StoreRefFilesToGrid - Copying local file %s to %s succeeded!", fullLocalPath.Data(), fullGridPath.Data()));
853                         RemoveFile(fullLocalPath);
854                         nTransfer++;
855                 }
856                 else
857                 {
858                         Log("SHUTTLE", Form("StoreRefFilesToGrid - Copying local file %s to %s FAILED!", fullLocalPath.Data(), fullGridPath.Data()));
859                         success = kFALSE;
860                 }
861         }
862
863         Log("SHUTTLE", Form("StoreRefFilesToGrid - %d (over %d) reference files in folder %s copied to Grid.", nTransfer, nDirs, dir.Data()));
864
865                 
866         delete dirList;
867         return success;
868 }
869
870 //______________________________________________________________________________________________
871 const char* AliShuttle::GetRefFilePrefix(const char* base, const char* detector)
872 {
873         //
874         // Get folder name of reference files 
875         //
876
877         TString offDetStr(GetOfflineDetName(detector));
878         TString dir;
879         if (offDetStr == "ITS" || offDetStr == "MUON" || offDetStr == "PHOS")
880         {
881                 dir.Form("%s/%s/%s", base, offDetStr.Data(), detector);
882         } else {
883                 dir.Form("%s/%s", base, offDetStr.Data());
884         }
885         
886         return dir.Data();
887         
888
889 }
890 //______________________________________________________________________________________________
891 void AliShuttle::CleanLocalStorage(const TString& uri)
892 {
893         //
894         // Called in case the preprocessor is declared failed. Remove remaining objects from the local storages.
895         //
896
897         const char* type = 0;
898         if(uri == fgkLocalCDB) {
899                 type = "OCDB";
900         } else if(uri == fgkLocalRefStorage) {
901                 type = "Reference";
902         } else {
903                 AliError(Form("Invalid storage URI: %s", uri.Data()));
904                 return;
905         }
906
907         AliCDBManager* man = AliCDBManager::Instance();
908
909         // open local storage
910         AliCDBStorage *localSto = man->GetStorage(uri);
911         if(!localSto) {
912                 Log("SHUTTLE",
913                         Form("CleanLocalStorage - cannot activate local %s storage", type));
914                 return;
915         }
916
917         TString filename(Form("%s/%s/*/Run*_v%d_s*.root",
918                 localSto->GetBaseFolder().Data(), GetOfflineDetName(fCurrentDetector.Data()), GetCurrentRun()));
919
920         AliInfo(Form("filename = %s", filename.Data()));
921
922         AliInfo(Form("Removing remaining local files from run %d and detector %s ...",
923                 GetCurrentRun(), fCurrentDetector.Data()));
924
925         RemoveFile(filename.Data());
926
927 }
928
929 //______________________________________________________________________________________________
930 void AliShuttle::RemoveFile(const char* filename)
931 {
932         //
933         // removes local file
934         //
935
936         TString command(Form("rm -f %s", filename));
937
938         Int_t result = gSystem->Exec(command.Data());
939         if(result != 0)
940         {
941                 Log("SHUTTLE", Form("RemoveFile - %s: Cannot remove file %s!",
942                         fCurrentDetector.Data(), filename));
943         }
944 }
945
946 //______________________________________________________________________________________________
947 AliShuttleStatus* AliShuttle::ReadShuttleStatus()
948 {
949         //
950         // Reads the AliShuttleStatus from the CDB
951         //
952
953         if (fStatusEntry){
954                 delete fStatusEntry;
955                 fStatusEntry = 0;
956         }
957
958         fStatusEntry = AliCDBManager::Instance()->GetStorage(GetLocalCDB())
959                 ->Get(Form("/SHUTTLE/STATUS/%s", fCurrentDetector.Data()), GetCurrentRun());
960
961         if (!fStatusEntry) return 0;
962         fStatusEntry->SetOwner(1);
963
964         AliShuttleStatus* status = dynamic_cast<AliShuttleStatus*> (fStatusEntry->GetObject());
965         if (!status) {
966                 AliError("Invalid object stored to CDB!");
967                 return 0;
968         }
969
970         return status;
971 }
972
973 //______________________________________________________________________________________________
974 Bool_t AliShuttle::WriteShuttleStatus(AliShuttleStatus* status)
975 {
976         //
977         // writes the status for one subdetector
978         //
979
980         if (fStatusEntry){
981                 delete fStatusEntry;
982                 fStatusEntry = 0;
983         }
984
985         Int_t run = GetCurrentRun();
986
987         AliCDBId id(AliCDBPath("SHUTTLE", "STATUS", fCurrentDetector), run, run);
988
989         fStatusEntry = new AliCDBEntry(status, id, new AliCDBMetaData);
990         fStatusEntry->SetOwner(1);
991
992         UInt_t result = AliCDBManager::Instance()->GetStorage(fgkLocalCDB)->Put(fStatusEntry);
993
994         if (!result) {
995                 Log("SHUTTLE", Form("WriteShuttleStatus - Failed for %s, run %d",
996                                                 fCurrentDetector.Data(), run));
997                 return kFALSE;
998         }
999         
1000         SendMLInfo();
1001
1002         return kTRUE;
1003 }
1004
1005 //______________________________________________________________________________________________
1006 void AliShuttle::UpdateShuttleStatus(AliShuttleStatus::Status newStatus, Bool_t increaseCount)
1007 {
1008         //
1009         // changes the AliShuttleStatus for the given detector and run to the given status
1010         //
1011
1012         if (!fStatusEntry){
1013                 AliError("UNEXPECTED: fStatusEntry empty");
1014                 return;
1015         }
1016
1017         AliShuttleStatus* status = dynamic_cast<AliShuttleStatus*> (fStatusEntry->GetObject());
1018
1019         if (!status){
1020                 Log("SHUTTLE", "UNEXPECTED: status could not be read from current CDB entry");
1021                 return;
1022         }
1023
1024         TString actionStr = Form("UpdateShuttleStatus - %s: Changing state from %s to %s",
1025                                 fCurrentDetector.Data(),
1026                                 status->GetStatusName(),
1027                                 status->GetStatusName(newStatus));
1028         Log("SHUTTLE", actionStr);
1029         SetLastAction(actionStr);
1030
1031         status->SetStatus(newStatus);
1032         if (increaseCount) status->IncreaseCount();
1033
1034         AliCDBManager::Instance()->GetStorage(fgkLocalCDB)->Put(fStatusEntry);
1035
1036         SendMLInfo();
1037 }
1038
1039 //______________________________________________________________________________________________
1040 void AliShuttle::SendMLInfo()
1041 {
1042         //
1043         // sends ML information about the current status of the current detector being processed
1044         //
1045         
1046         AliShuttleStatus* status = dynamic_cast<AliShuttleStatus*> (fStatusEntry->GetObject());
1047         
1048         if (!status){
1049                 Log("SHUTTLE", "SendMLInfo - UNEXPECTED: status could not be read from current CDB entry");
1050                 return;
1051         }
1052         
1053         TMonaLisaText  mlStatus(Form("%s_status", fCurrentDetector.Data()), status->GetStatusName());
1054         TMonaLisaValue mlRetryCount(Form("%s_count", fCurrentDetector.Data()), status->GetCount());
1055
1056         TList mlList;
1057         mlList.Add(&mlStatus);
1058         mlList.Add(&mlRetryCount);
1059
1060         fMonaLisa->SendParameters(&mlList);
1061 }
1062
1063 //______________________________________________________________________________________________
1064 Bool_t AliShuttle::ContinueProcessing()
1065 {
1066         // this function reads the AliShuttleStatus information from CDB and
1067         // checks if the processing should be continued
1068         // if yes it returns kTRUE and updates the AliShuttleStatus with nextStatus
1069
1070         if (!fConfig->HostProcessDetector(fCurrentDetector)) return kFALSE;
1071
1072         AliPreprocessor* aPreprocessor =
1073                 dynamic_cast<AliPreprocessor*> (fPreprocessorMap.GetValue(fCurrentDetector));
1074         if (!aPreprocessor)
1075         {
1076                 AliInfo(Form("%s: no preprocessor registered", fCurrentDetector.Data()));
1077                 return kFALSE;
1078         }
1079
1080         AliShuttleLogbookEntry::Status entryStatus =
1081                 fLogbookEntry->GetDetectorStatus(fCurrentDetector);
1082
1083         if(entryStatus != AliShuttleLogbookEntry::kUnprocessed) {
1084                 AliInfo(Form("ContinueProcessing - %s is %s",
1085                                 fCurrentDetector.Data(),
1086                                 fLogbookEntry->GetDetectorStatusName(entryStatus)));
1087                 return kFALSE;
1088         }
1089
1090         // if we get here, according to Shuttle logbook subdetector is in UNPROCESSED state
1091
1092         // check if current run is first unprocessed run for current detector
1093         if (fConfig->StrictRunOrder(fCurrentDetector) &&
1094                 !fFirstUnprocessed[GetDetPos(fCurrentDetector)])
1095         {
1096                 if (fTestMode == kNone)
1097                 {
1098                         Log("SHUTTLE", Form("ContinueProcessing - %s requires strict run ordering but this is not the first unprocessed run!"));
1099                         return kFALSE;
1100                 }
1101                 else
1102                 {
1103                         Log("SHUTTLE", Form("ContinueProcessing - In TESTMODE - Although %s requires strict run ordering and this is not the first unprocessed run, the SHUTTLE continues"));
1104                 }
1105         }
1106
1107         AliShuttleStatus* status = ReadShuttleStatus();
1108         if (!status) {
1109                 // first time
1110                 Log("SHUTTLE", Form("ContinueProcessing - %s: Processing first time",
1111                                 fCurrentDetector.Data()));
1112                 status = new AliShuttleStatus(AliShuttleStatus::kStarted);
1113                 return WriteShuttleStatus(status);
1114         }
1115
1116         // The following two cases shouldn't happen if Shuttle Logbook was correctly updated.
1117         // If it happens it may mean Logbook updating failed... let's do it now!
1118         if (status->GetStatus() == AliShuttleStatus::kDone ||
1119             status->GetStatus() == AliShuttleStatus::kFailed){
1120                 Log("SHUTTLE", Form("ContinueProcessing - %s is already %s. Updating Shuttle Logbook",
1121                                         fCurrentDetector.Data(),
1122                                         status->GetStatusName(status->GetStatus())));
1123                 UpdateShuttleLogbook(fCurrentDetector.Data(),
1124                                         status->GetStatusName(status->GetStatus()));
1125                 return kFALSE;
1126         }
1127
1128         if (status->GetStatus() == AliShuttleStatus::kStoreError) {
1129                 Log("SHUTTLE",
1130                         Form("ContinueProcessing - %s: Grid storage of one or more objects failed. Trying again now",
1131                                 fCurrentDetector.Data()));
1132                 UpdateShuttleStatus(AliShuttleStatus::kStoreStarted);
1133                 if (StoreOCDB()){
1134                         Log("SHUTTLE", Form("ContinueProcessing - %s: all objects successfully stored into main storage",
1135                                 fCurrentDetector.Data()));
1136                         UpdateShuttleStatus(AliShuttleStatus::kDone);
1137                         UpdateShuttleLogbook(fCurrentDetector.Data(), "DONE");
1138                 } else {
1139                         Log("SHUTTLE",
1140                                 Form("ContinueProcessing - %s: Grid storage failed again",
1141                                         fCurrentDetector.Data()));
1142                         UpdateShuttleStatus(AliShuttleStatus::kStoreError);
1143                 }
1144                 return kFALSE;
1145         }
1146
1147         // if we get here, there is a restart
1148         Bool_t cont = kFALSE;
1149
1150         // abort conditions
1151         if (status->GetCount() >= fConfig->GetMaxRetries()) {
1152                 Log("SHUTTLE", Form("ContinueProcessing - %s failed %d times in status %s - "
1153                                 "Updating Shuttle Logbook", fCurrentDetector.Data(),
1154                                 status->GetCount(), status->GetStatusName()));
1155                 UpdateShuttleLogbook(fCurrentDetector.Data(), "FAILED");
1156                 UpdateShuttleStatus(AliShuttleStatus::kFailed);
1157
1158                 // there may still be objects in local OCDB and reference storage
1159                 // and FXS databases may be not updated: do it now!
1160                 
1161                 // TODO Currently disabled, we want to keep files in case of failure!
1162                 // CleanLocalStorage(fgkLocalCDB);
1163                 // CleanLocalStorage(fgkLocalRefStorage);
1164                 // UpdateTableFailCase();
1165                 
1166                 // Send mail to detector expert!
1167                 AliInfo(Form("Sending mail to %s expert...", fCurrentDetector.Data()));
1168                 if (!SendMail())
1169                         Log("SHUTTLE", Form("ContinueProcessing - Could not send mail to %s expert",
1170                                         fCurrentDetector.Data()));
1171
1172         } else {
1173                 Log("SHUTTLE", Form("ContinueProcessing - %s: restarting. "
1174                                 "Aborted before with %s. Retry number %d.", fCurrentDetector.Data(),
1175                                 status->GetStatusName(), status->GetCount()));
1176                 Bool_t increaseCount = kTRUE;
1177                 if (status->GetStatus() == AliShuttleStatus::kDCSError || status->GetStatus() == AliShuttleStatus::kDCSStarted)
1178                         increaseCount = kFALSE;
1179                 UpdateShuttleStatus(AliShuttleStatus::kStarted, increaseCount);
1180                 cont = kTRUE;
1181         }
1182
1183         return cont;
1184 }
1185
1186 //______________________________________________________________________________________________
1187 Bool_t AliShuttle::Process(AliShuttleLogbookEntry* entry)
1188 {
1189         //
1190         // Makes data retrieval for all detectors in the configuration.
1191         // entry: Shuttle logbook entry, contains run paramenters and status of detectors
1192         // (Unprocessed, Inactive, Failed or Done).
1193         // Returns kFALSE in case of error occured and kTRUE otherwise
1194         //
1195
1196         if (!entry) return kFALSE;
1197
1198         fLogbookEntry = entry;
1199
1200         AliInfo(Form("\n\n \t\t\t^*^*^*^*^*^*^*^*^*^*^*^* run %d: START ^*^*^*^*^*^*^*^*^*^*^*^* \n",
1201                                         GetCurrentRun()));
1202
1203         // create ML instance that monitors this run
1204         fMonaLisa = new TMonaLisaWriter(Form("%d", GetCurrentRun()), "SHUTTLE", "aliendb1.cern.ch");
1205         // disable monitoring of other parameters that come e.g. from TFile
1206         gMonitoringWriter = 0;
1207
1208         // Send the information to ML
1209         TMonaLisaText  mlStatus("SHUTTLE_status", "Processing");
1210         TMonaLisaText  mlRunType("SHUTTLE_runtype", Form("%s (%s)", entry->GetRunType(), entry->GetRunParameter("log")));
1211
1212         TList mlList;
1213         mlList.Add(&mlStatus);
1214         mlList.Add(&mlRunType);
1215
1216         fMonaLisa->SendParameters(&mlList);
1217
1218         if (fLogbookEntry->IsDone())
1219         {
1220                 Log("SHUTTLE","Process - Shuttle is already DONE. Updating logbook");
1221                 UpdateShuttleLogbook("shuttle_done");
1222                 fLogbookEntry = 0;
1223                 return kTRUE;
1224         }
1225
1226         // read test mode if flag is set
1227         if (fReadTestMode)
1228         {
1229                 fTestMode = kNone;
1230                 TString logEntry(entry->GetRunParameter("log"));
1231                 //printf("log entry = %s\n", logEntry.Data());
1232                 TString searchStr("Testmode: ");
1233                 Int_t pos = logEntry.Index(searchStr.Data());
1234                 //printf("%d\n", pos);
1235                 if (pos >= 0)
1236                 {
1237                         TSubString subStr = logEntry(pos + searchStr.Length(), logEntry.Length());
1238                         //printf("%s\n", subStr.String().Data());
1239                         TString newStr(subStr.Data());
1240                         TObjArray* token = newStr.Tokenize(' ');
1241                         if (token)
1242                         {
1243                                 //token->Print();
1244                                 TObjString* tmpStr = dynamic_cast<TObjString*> (token->First());
1245                                 if (tmpStr)
1246                                 {
1247                                         Int_t testMode = tmpStr->String().Atoi();
1248                                         if (testMode > 0)
1249                                         {
1250                                                 Log("SHUTTLE", Form("Enabling test mode %d", testMode));
1251                                                 SetTestMode((TestMode) testMode);
1252                                         }
1253                                 }
1254                                 delete token;          
1255                         }
1256                 }
1257         }
1258         
1259         Log("SHUTTLE", Form("The test mode flag is %d", (Int_t) fTestMode));
1260         
1261         fLogbookEntry->Print("all");
1262
1263         // Initialization
1264         Bool_t hasError = kFALSE;
1265
1266         AliCDBStorage *mainCDBSto = AliCDBManager::Instance()->GetStorage(fgkMainCDB);
1267         if(mainCDBSto) mainCDBSto->QueryCDB(GetCurrentRun());
1268         AliCDBStorage *mainRefSto = AliCDBManager::Instance()->GetStorage(fgkMainRefStorage);
1269         if(mainRefSto) mainRefSto->QueryCDB(GetCurrentRun());
1270
1271         // Loop on detectors in the configuration
1272         TIter iter(fConfig->GetDetectors());
1273         TObjString* aDetector = 0;
1274
1275         while ((aDetector = (TObjString*) iter.Next()))
1276         {
1277                 fCurrentDetector = aDetector->String();
1278
1279                 if (ContinueProcessing() == kFALSE) continue;
1280
1281                 AliInfo(Form("\n\n \t\t\t****** run %d - %s: START  ******",
1282                                                 GetCurrentRun(), aDetector->GetName()));
1283
1284                 for(Int_t iSys=0;iSys<3;iSys++) fFXSCalled[iSys]=kFALSE;
1285
1286                 Log(fCurrentDetector.Data(), "Starting processing");
1287
1288                 Int_t pid = fork();
1289
1290                 if (pid < 0)
1291                 {
1292                         Log("SHUTTLE", "ERROR: Forking failed");
1293                 }
1294                 else if (pid > 0)
1295                 {
1296                         // parent
1297                         AliInfo(Form("In parent process of %d - %s: Starting monitoring",
1298                                                         GetCurrentRun(), aDetector->GetName()));
1299
1300                         Long_t begin = time(0);
1301
1302                         int status; // to be used with waitpid, on purpose an int (not Int_t)!
1303                         while (waitpid(pid, &status, WNOHANG) == 0)
1304                         {
1305                                 Long_t expiredTime = time(0) - begin;
1306
1307                                 if (expiredTime > fConfig->GetPPTimeOut())
1308                                 {
1309                                         TString tmp;
1310                                         tmp.Form("Process of %s time out. Run time: %d seconds. Killing...",
1311                                                                 fCurrentDetector.Data(), expiredTime);
1312                                         Log("SHUTTLE", tmp);
1313                                         Log(fCurrentDetector, tmp);
1314
1315                                         kill(pid, 9);
1316
1317                                         UpdateShuttleStatus(AliShuttleStatus::kPPTimeOut);
1318                                         hasError = kTRUE;
1319
1320                                         gSystem->Sleep(1000);
1321                                 }
1322                                 else
1323                                 {
1324                                         gSystem->Sleep(1000);
1325                                         
1326                                         TString checkStr;
1327                                         checkStr.Form("ps -o vsize --pid %d | tail -n 1", pid);
1328                                         FILE* pipe = gSystem->OpenPipe(checkStr, "r");
1329                                         if (!pipe)
1330                                         {
1331                                                 Log("SHUTTLE", Form("Error: Could not open pipe to %s", checkStr.Data()));
1332                                                 continue;
1333                                         }
1334                                                 
1335                                         char buffer[100];
1336                                         if (!fgets(buffer, 100, pipe))
1337                                         {
1338                                                 Log("SHUTTLE", "Error: ps did not return anything");
1339                                                 gSystem->ClosePipe(pipe);
1340                                                 continue;
1341                                         }
1342                                         gSystem->ClosePipe(pipe);
1343                                         
1344                                         //Log("SHUTTLE", Form("ps returned %s", buffer));
1345                                         
1346                                         Int_t mem = 0;
1347                                         if ((sscanf(buffer, "%d\n", &mem) != 1) || !mem)
1348                                         {
1349                                                 Log("SHUTTLE", "Error: Could not parse output of ps");
1350                                                 continue;
1351                                         }
1352                                         
1353                                         if (expiredTime % 60 == 0)
1354                                                 Log("SHUTTLE", Form("%s: Checking process. Run time: %d seconds - Memory consumption: %d KB",
1355                                                                 fCurrentDetector.Data(), expiredTime, mem));
1356                                         
1357                                         if (mem > fConfig->GetPPMaxMem())
1358                                         {
1359                                                 TString tmp;
1360                                                 tmp.Form("Process exceeds maximum allowed memory (%d KB > %d KB). Killing...",
1361                                                         mem, fConfig->GetPPMaxMem());
1362                                                 Log("SHUTTLE", tmp);
1363                                                 Log(fCurrentDetector, tmp);
1364         
1365                                                 kill(pid, 9);
1366         
1367                                                 UpdateShuttleStatus(AliShuttleStatus::kPPOutOfMemory);
1368                                                 hasError = kTRUE;
1369         
1370                                                 gSystem->Sleep(1000);
1371                                         }
1372                                 }
1373                         }
1374
1375                         AliInfo(Form("In parent process of %d - %s: Client has terminated.",
1376                                                                 GetCurrentRun(), aDetector->GetName()));
1377
1378                         if (WIFEXITED(status))
1379                         {
1380                                 Int_t returnCode = WEXITSTATUS(status);
1381
1382                                 Log("SHUTTLE", Form("%s: the return code is %d", fCurrentDetector.Data(),
1383                                                                                 returnCode));
1384
1385                                 if (returnCode == 0) hasError = kTRUE;
1386                         }
1387                 }
1388                 else if (pid == 0)
1389                 {
1390                         // client
1391                         AliInfo(Form("In client process of %d - %s", GetCurrentRun(), aDetector->GetName()));
1392
1393                         AliInfo("Redirecting output...");
1394
1395                         if ((freopen(GetLogFileName(fCurrentDetector), "a", stdout)) == 0)
1396                         {
1397                                 Log("SHUTTLE", "Could not freopen stdout");
1398                         }
1399                         else
1400                         {
1401                                 fOutputRedirected = kTRUE;
1402                                 if ((dup2(fileno(stdout), fileno(stderr))) < 0)
1403                                         Log("SHUTTLE", "Could not redirect stderr");
1404                                 
1405                         }
1406                         
1407                         Bool_t success = ProcessCurrentDetector();
1408                         if (success) // Preprocessor finished successfully!
1409                         { 
1410                                 // Update time_processed field in FXS DB
1411                                 if (UpdateTable() == kFALSE)
1412                                         Log("SHUTTLE", Form("Process - %s: Could not update FXS databases!"));
1413
1414                                 // Transfer the data from local storage to main storage (Grid)
1415                                 UpdateShuttleStatus(AliShuttleStatus::kStoreStarted);
1416                                 if (StoreOCDB() == kFALSE)
1417                                 {
1418                                         AliInfo(Form("\n \t\t\t****** run %d - %s: STORAGE ERROR ****** \n\n",
1419                                                         GetCurrentRun(), aDetector->GetName()));
1420                                         UpdateShuttleStatus(AliShuttleStatus::kStoreError);
1421                                         success = kFALSE;
1422                                 } else {
1423                                         AliInfo(Form("\n \t\t\t****** run %d - %s: DONE ****** \n\n",
1424                                                         GetCurrentRun(), aDetector->GetName()));
1425                                         UpdateShuttleStatus(AliShuttleStatus::kDone);
1426                                         UpdateShuttleLogbook(fCurrentDetector, "DONE");
1427                                 }
1428                         }
1429
1430                         for (UInt_t iSys=0; iSys<3; iSys++)
1431                         {
1432                                 if (fFXSCalled[iSys]) fFXSlist[iSys].Clear();
1433                         }
1434
1435                         AliInfo(Form("Client process of %d - %s is exiting now with %d.",
1436                                                         GetCurrentRun(), aDetector->GetName(), success));
1437
1438                         // the client exits here
1439                         gSystem->Exit(success);
1440
1441                         AliError("We should never get here!!!");
1442                 }
1443         }
1444
1445         AliInfo(Form("\n\n \t\t\t^*^*^*^*^*^*^*^*^*^*^*^* run %d: FINISH ^*^*^*^*^*^*^*^*^*^*^*^* \n",
1446                                                         GetCurrentRun()));
1447
1448         //check if shuttle is done for this run, if so update logbook
1449         TObjArray checkEntryArray;
1450         checkEntryArray.SetOwner(1);
1451         TString whereClause = Form("where run=%d", GetCurrentRun());
1452         if (!QueryShuttleLogbook(whereClause.Data(), checkEntryArray) || checkEntryArray.GetEntries() == 0) {
1453                 Log("SHUTTLE", Form("Process - Warning: Cannot check status of run %d on Shuttle logbook!",
1454                                                 GetCurrentRun()));
1455                 return hasError == kFALSE;
1456         }
1457
1458         AliShuttleLogbookEntry* checkEntry = dynamic_cast<AliShuttleLogbookEntry*>
1459                                                 (checkEntryArray.At(0));
1460
1461         if (checkEntry)
1462         {
1463                 if (checkEntry->IsDone())
1464                 {
1465                         Log("SHUTTLE","Process - Shuttle is DONE. Updating logbook");
1466                         UpdateShuttleLogbook("shuttle_done");
1467                 }
1468                 else
1469                 {
1470                         for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
1471                         {
1472                                 if (checkEntry->GetDetectorStatus(iDet) == AliShuttleLogbookEntry::kUnprocessed)
1473                                 {
1474                                         AliDebug(2, Form("Run %d: setting %s as \"not first time unprocessed\"",
1475                                                         checkEntry->GetRun(), GetDetName(iDet)));
1476                                         fFirstUnprocessed[iDet] = kFALSE;
1477                                 }
1478                         }
1479                 }
1480         }
1481
1482         // remove ML instance
1483         delete fMonaLisa;
1484         fMonaLisa = 0;
1485
1486         fLogbookEntry = 0;
1487
1488         return hasError == kFALSE;
1489 }
1490
1491 //______________________________________________________________________________________________
1492 Bool_t AliShuttle::ProcessCurrentDetector()
1493 {
1494         //
1495         // Makes data retrieval just for a specific detector (fCurrentDetector).
1496         // Threre should be a configuration for this detector.
1497
1498         AliInfo(Form("Retrieving values for %s, run %d", fCurrentDetector.Data(), GetCurrentRun()));
1499
1500         if (!CleanReferenceStorage(fCurrentDetector.Data()))
1501                 return kFALSE;
1502
1503         TMap* dcsMap = 0;
1504
1505         // call preprocessor
1506         AliPreprocessor* aPreprocessor =
1507                 dynamic_cast<AliPreprocessor*> (fPreprocessorMap.GetValue(fCurrentDetector));
1508
1509         aPreprocessor->Initialize(GetCurrentRun(), GetCurrentStartTime(), GetCurrentEndTime());
1510
1511         Bool_t processDCS = aPreprocessor->ProcessDCS();
1512
1513         if (!processDCS)
1514         {
1515                 Log(fCurrentDetector, "The preprocessor requested to skip the retrieval of DCS values");
1516         }
1517         else if (fTestMode & kSkipDCS)
1518         {
1519                 Log(fCurrentDetector, "In TESTMODE - Skipping DCS processing!");
1520         } 
1521         else if (fTestMode & kErrorDCS)
1522         {
1523                 Log(fCurrentDetector, "In TESTMODE - Simulating DCS error");
1524                 UpdateShuttleStatus(AliShuttleStatus::kDCSStarted);
1525                 UpdateShuttleStatus(AliShuttleStatus::kDCSError);
1526                 return kFALSE;
1527         } else {
1528
1529                 UpdateShuttleStatus(AliShuttleStatus::kDCSStarted);
1530
1531                 TString host(fConfig->GetDCSHost(fCurrentDetector));
1532                 Int_t port = fConfig->GetDCSPort(fCurrentDetector);
1533
1534                 if (fConfig->GetDCSAliases(fCurrentDetector)->GetEntries() > 0)
1535                 {
1536                         dcsMap = GetValueSet(host, port, fConfig->GetDCSAliases(fCurrentDetector), kAlias);
1537                         if (!dcsMap)
1538                         {
1539                                 Log(fCurrentDetector, "ProcessCurrentDetector - Error while retrieving DCS aliases");
1540                                 UpdateShuttleStatus(AliShuttleStatus::kDCSError);
1541                                 return kFALSE;
1542                         }
1543                 }
1544                 
1545                 if (fConfig->GetDCSDataPoints(fCurrentDetector)->GetEntries() > 0)
1546                 {
1547                         TMap* dcsMap2 = GetValueSet(host, port, fConfig->GetDCSDataPoints(fCurrentDetector), kDP);
1548                         if (!dcsMap2)
1549                         {
1550                                 Log(fCurrentDetector, "ProcessCurrentDetector - Error while retrieving DCS data points");
1551                                 UpdateShuttleStatus(AliShuttleStatus::kDCSError);
1552                                 if (dcsMap)
1553                                         delete dcsMap;
1554                                 return kFALSE;
1555                         }
1556                         
1557                         if (!dcsMap)
1558                         {
1559                                 dcsMap = dcsMap2;
1560                         }
1561                         else // merge
1562                         {
1563                                 TIter iter(dcsMap2);
1564                                 TObjString* key = 0;
1565                                 while ((key = (TObjString*) iter.Next()))
1566                                         dcsMap->Add(key, dcsMap2->GetValue(key->String()));
1567                                         
1568                                 dcsMap2->SetOwner(kFALSE);
1569                                 delete dcsMap2;
1570                         }
1571                 }
1572                 
1573         }
1574
1575         // still no map?
1576         if (!dcsMap)
1577                 dcsMap = new TMap;
1578         
1579         // DCS Archive DB processing successful. Call Preprocessor!
1580         UpdateShuttleStatus(AliShuttleStatus::kPPStarted);
1581
1582         UInt_t returnValue = aPreprocessor->Process(dcsMap);
1583
1584         if (returnValue > 0) // Preprocessor error!
1585         {
1586                 Log(fCurrentDetector, Form("Preprocessor failed. Process returned %d.", returnValue));
1587                 UpdateShuttleStatus(AliShuttleStatus::kPPError);
1588                 dcsMap->DeleteAll();
1589                 delete dcsMap;
1590                 return kFALSE;
1591         }
1592         
1593         // preprocessor ok!
1594         UpdateShuttleStatus(AliShuttleStatus::kPPDone);
1595         Log(fCurrentDetector, Form("ProcessCurrentDetector - %s preprocessor returned success",
1596                                 fCurrentDetector.Data()));
1597
1598         dcsMap->DeleteAll();
1599         delete dcsMap;
1600
1601         return kTRUE;
1602 }
1603
1604 //______________________________________________________________________________________________
1605 Bool_t AliShuttle::QueryShuttleLogbook(const char* whereClause,
1606                 TObjArray& entries)
1607 {
1608         // Query DAQ's Shuttle logbook and fills detector status object.
1609         // Call QueryRunParameters to query DAQ logbook for run parameters.
1610         //
1611
1612         entries.SetOwner(1);
1613
1614         // check connection, in case connect
1615         if(!Connect(3)) return kFALSE;
1616
1617         TString sqlQuery;
1618         sqlQuery = Form("select * from %s %s order by run", fConfig->GetShuttlelbTable(), whereClause);
1619
1620         TSQLResult* aResult = fServer[3]->Query(sqlQuery);
1621         if (!aResult) {
1622                 AliError(Form("Can't execute query <%s>!", sqlQuery.Data()));
1623                 return kFALSE;
1624         }
1625
1626         AliDebug(2,Form("Query = %s", sqlQuery.Data()));
1627
1628         if(aResult->GetRowCount() == 0) {
1629                 AliInfo("No entries in Shuttle Logbook match request");
1630                 delete aResult;
1631                 return kTRUE;
1632         }
1633
1634         // TODO Check field count!
1635         const UInt_t nCols = 23;
1636         if (aResult->GetFieldCount() != (Int_t) nCols) {
1637                 AliError("Invalid SQL result field number!");
1638                 delete aResult;
1639                 return kFALSE;
1640         }
1641
1642         TSQLRow* aRow;
1643         while ((aRow = aResult->Next())) {
1644                 TString runString(aRow->GetField(0), aRow->GetFieldLength(0));
1645                 Int_t run = runString.Atoi();
1646
1647                 AliShuttleLogbookEntry *entry = QueryRunParameters(run);
1648                 if (!entry)
1649                         continue;
1650
1651                 // loop on detectors
1652                 for(UInt_t ii = 0; ii < nCols; ii++)
1653                         entry->SetDetectorStatus(aResult->GetFieldName(ii), aRow->GetField(ii));
1654
1655                 entries.AddLast(entry);
1656                 delete aRow;
1657         }
1658
1659         delete aResult;
1660         return kTRUE;
1661 }
1662
1663 //______________________________________________________________________________________________
1664 AliShuttleLogbookEntry* AliShuttle::QueryRunParameters(Int_t run)
1665 {
1666         //
1667         // Retrieve run parameters written in the DAQ logbook and sets them into AliShuttleLogbookEntry object
1668         //
1669
1670         // check connection, in case connect
1671         if (!Connect(3))
1672                 return 0;
1673
1674         TString sqlQuery;
1675         sqlQuery.Form("select * from %s where run=%d", fConfig->GetDAQlbTable(), run);
1676
1677         TSQLResult* aResult = fServer[3]->Query(sqlQuery);
1678         if (!aResult) {
1679                 AliError(Form("Can't execute query <%s>!", sqlQuery.Data()));
1680                 return 0;
1681         }
1682
1683         if (aResult->GetRowCount() == 0) {
1684                 Log("SHUTTLE", Form("QueryRunParameters - No entry in DAQ Logbook for run %d. Skipping", run));
1685                 delete aResult;
1686                 return 0;
1687         }
1688
1689         if (aResult->GetRowCount() > 1) {
1690                 AliError(Form("More than one entry in DAQ Logbook for run %d. Skipping", run));
1691                 delete aResult;
1692                 return 0;
1693         }
1694
1695         TSQLRow* aRow = aResult->Next();
1696         if (!aRow)
1697         {
1698                 AliError(Form("Could not retrieve row for run %d. Skipping", run));
1699                 delete aResult;
1700                 return 0;
1701         }
1702
1703         AliShuttleLogbookEntry* entry = new AliShuttleLogbookEntry(run);
1704
1705         for (Int_t ii = 0; ii < aResult->GetFieldCount(); ii++)
1706                 entry->SetRunParameter(aResult->GetFieldName(ii), aRow->GetField(ii));
1707
1708         UInt_t startTime = entry->GetStartTime();
1709         UInt_t endTime = entry->GetEndTime();
1710
1711         if (!startTime || !endTime || startTime > endTime) {
1712                 Log("SHUTTLE",
1713                         Form("QueryRunParameters - Invalid parameters for Run %d: startTime = %d, endTime = %d",
1714                                 run, startTime, endTime));
1715                 delete entry;
1716                 delete aRow;
1717                 delete aResult;
1718                 return 0;
1719         }
1720
1721         delete aRow;
1722         delete aResult;
1723
1724         return entry;
1725 }
1726
1727 //______________________________________________________________________________________________
1728 Bool_t AliShuttle::GetValueSet(const char* host, Int_t port, const char* entry,
1729                                 TObjArray* valueSet, DCSType type)
1730 {
1731         // Retrieve all "entry" data points from the DCS server
1732         // host, port: TSocket connection parameters
1733         // entry: name of the alias or data point
1734         // valueSet: array of retrieved AliDCSValue's
1735         // type: kAlias or kDP
1736
1737         AliDCSClient client(host, port, fTimeout, fRetries);
1738         if (!client.IsConnected())
1739         {
1740                 return kFALSE;
1741         }
1742
1743         Int_t result=0;
1744
1745         if (type == kAlias)
1746         {
1747                 result = client.GetAliasValues(entry,
1748                         GetCurrentStartTime(), GetCurrentEndTime(), valueSet);
1749         } else
1750         if (type == kDP)
1751         {
1752                 result = client.GetDPValues(entry,
1753                         GetCurrentStartTime(), GetCurrentEndTime(), valueSet);
1754         }
1755
1756         if (result < 0)
1757         {
1758                 Log(fCurrentDetector.Data(), Form("GetValueSet - Can't get '%s'! Reason: %s",
1759                         entry, AliDCSClient::GetErrorString(result)));
1760
1761                 if (result == AliDCSClient::fgkServerError)
1762                 {
1763                         Log(fCurrentDetector.Data(), Form("GetValueSet - Server error: %s",
1764                                 client.GetServerError().Data()));
1765                 }
1766
1767                 return kFALSE;
1768         }
1769
1770         return kTRUE;
1771 }
1772
1773 //______________________________________________________________________________________________
1774 TMap* AliShuttle::GetValueSet(const char* host, Int_t port, const TSeqCollection* entries,
1775                               DCSType type)
1776 {
1777         // Retrieve all "entry" data points from the DCS server
1778         // host, port: TSocket connection parameters
1779         // entries: list of name of the alias or data point
1780         // type: kAlias or kDP
1781         // returns TMap of values, 0 when failure
1782
1783         const Int_t kSplit = 100; // maximum number of DPs at a time
1784         
1785         Int_t totalEntries = entries->GetEntries();
1786         
1787         TMap* result = 0;
1788         
1789         for (Int_t index=0; index < totalEntries; index += kSplit)
1790         {
1791                 Int_t endIndex = index + kSplit;
1792         
1793                 AliDCSClient client(host, port, fTimeout, fRetries);
1794                 if (!client.IsConnected())
1795                         return 0;
1796
1797                 TMap* partialResult = 0;
1798
1799                 if (type == kAlias)
1800                 {
1801                         partialResult = client.GetAliasValues(entries, GetCurrentStartTime(), 
1802                                 GetCurrentEndTime(), index, endIndex);
1803                 } 
1804                 else if (type == kDP)
1805                 {
1806                         partialResult = client.GetDPValues(entries, GetCurrentStartTime(), 
1807                                 GetCurrentEndTime(), index, endIndex);
1808                 }
1809
1810                 if (partialResult == 0)
1811                 {
1812                         Log(fCurrentDetector.Data(), Form("GetValueSet - Can't get entries (%d...%d)! Reason: %s",
1813                                 index, endIndex, client.GetServerError().Data()));
1814         
1815                         if (result)
1816                                 delete result;
1817                                 
1818                         return 0;
1819                 }
1820                 
1821                 AliInfo(Form("Retrieved entries %d..%d (total %d); E.g. %s has %d values collected",
1822                                         index, endIndex, totalEntries, entries->At(index)->GetName(), ((TObjArray*)
1823                                         partialResult->GetValue(entries->At(index)->GetName()))->GetEntriesFast()));
1824                 
1825                 if (!result)
1826                 {
1827                         result = partialResult;
1828                 }
1829                 else
1830                 {               
1831                         TIter iter(partialResult);
1832                         TObjString* key = 0;
1833                         while ((key = (TObjString*) iter.Next()))
1834                                 result->Add(key, partialResult->GetValue(key->String()));
1835                                 
1836                         partialResult->SetOwner(kFALSE);
1837                         delete partialResult;
1838                 }
1839         
1840         }
1841
1842         return result;
1843 }
1844 //______________________________________________________________________________________________
1845 const char* AliShuttle::GetFile(Int_t system, const char* detector,
1846                 const char* id, const char* source)
1847 {
1848         // Get calibration file from file exchange servers
1849         // First queris the FXS database for the file name, using the run, detector, id and source info
1850         // then calls RetrieveFile(filename) for actual copy to local disk
1851         // run: current run being processed (given by Logbook entry fLogbookEntry)
1852         // detector: the Preprocessor name
1853         // id: provided as a parameter by the Preprocessor
1854         // source: provided by the Preprocessor through GetFileSources function
1855
1856         // check if test mode should simulate a FXS error
1857         if (fTestMode & kErrorFXSFiles)
1858         {
1859                 Log(detector, Form("GetFile - In TESTMODE - Simulating error while connecting to %s FXS", GetSystemName(system)));
1860                 return 0;
1861         }
1862         
1863         // check connection, in case connect
1864         if (!Connect(system))
1865         {
1866                 Log(detector, Form("GetFile - Couldn't connect to %s FXS database", GetSystemName(system)));
1867                 return 0;
1868         }
1869
1870         // Query preparation
1871         TString sourceName(source);
1872         Int_t nFields = 3;
1873         TString sqlQueryStart = Form("select filePath,size,fileChecksum from %s where",
1874                                                                 fConfig->GetFXSdbTable(system));
1875         TString whereClause = Form("run=%d and detector=\"%s\" and fileId=\"%s\"",
1876                                                                 GetCurrentRun(), detector, id);
1877
1878         if (system == kDAQ)
1879         {
1880                 whereClause += Form(" and DAQsource=\"%s\"", source);
1881         }
1882         else if (system == kDCS)
1883         {
1884                 sourceName="none";
1885         }
1886         else if (system == kHLT)
1887         {
1888                 whereClause += Form(" and DDLnumbers=\"%s\"", source);
1889                 nFields = 3;
1890         }
1891
1892         TString sqlQuery = Form("%s %s", sqlQueryStart.Data(), whereClause.Data());
1893
1894         AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
1895
1896         // Query execution
1897         TSQLResult* aResult = 0;
1898         aResult = dynamic_cast<TSQLResult*> (fServer[system]->Query(sqlQuery));
1899         if (!aResult) {
1900                 Log(detector, Form("GetFileName - Can't execute SQL query to %s database for: id = %s, source = %s",
1901                                 GetSystemName(system), id, sourceName.Data()));
1902                 return 0;
1903         }
1904
1905         if(aResult->GetRowCount() == 0)
1906         {
1907                 Log(detector,
1908                         Form("GetFileName - No entry in %s FXS db for: id = %s, source = %s",
1909                                 GetSystemName(system), id, sourceName.Data()));
1910                 delete aResult;
1911                 return 0;
1912         }
1913
1914         if (aResult->GetRowCount() > 1) {
1915                 Log(detector,
1916                         Form("GetFileName - More than one entry in %s FXS db for: id = %s, source = %s",
1917                                 GetSystemName(system), id, sourceName.Data()));
1918                 delete aResult;
1919                 return 0;
1920         }
1921
1922         if (aResult->GetFieldCount() != nFields) {
1923                 Log(detector,
1924                         Form("GetFileName - Wrong field count in %s FXS db for: id = %s, source = %s",
1925                                 GetSystemName(system), id, sourceName.Data()));
1926                 delete aResult;
1927                 return 0;
1928         }
1929
1930         TSQLRow* aRow = dynamic_cast<TSQLRow*> (aResult->Next());
1931
1932         if (!aRow){
1933                 Log(detector, Form("GetFileName - Empty set result in %s FXS db from query: id = %s, source = %s",
1934                                 GetSystemName(system), id, sourceName.Data()));
1935                 delete aResult;
1936                 return 0;
1937         }
1938
1939         TString filePath(aRow->GetField(0), aRow->GetFieldLength(0));
1940         TString fileSize(aRow->GetField(1), aRow->GetFieldLength(1));
1941         TString fileChecksum(aRow->GetField(2), aRow->GetFieldLength(2));
1942
1943         delete aResult;
1944         delete aRow;
1945
1946         AliDebug(2, Form("filePath = %s; size = %s, fileChecksum = %s",
1947                                 filePath.Data(), fileSize.Data(), fileChecksum.Data()));
1948
1949         // retrieved file is renamed to make it unique
1950         TString localFileName = Form("%s_%s_%d_%s_%s.shuttle",
1951                                         GetSystemName(system), detector, GetCurrentRun(), id, sourceName.Data());
1952
1953
1954         // file retrieval from FXS
1955         UInt_t nRetries = 0;
1956         UInt_t maxRetries = 3;
1957         Bool_t result = kFALSE;
1958
1959         // copy!! if successful TSystem::Exec returns 0
1960         while(nRetries++ < maxRetries) {
1961                 AliDebug(2, Form("Trying to copy file. Retry # %d", nRetries));
1962                 result = RetrieveFile(system, filePath.Data(), localFileName.Data());
1963                 if(!result)
1964                 {
1965                         Log(detector, Form("GetFileName - Copy of file %s from %s FXS failed",
1966                                         filePath.Data(), GetSystemName(system)));
1967                         continue;
1968                 } else {
1969                         AliInfo(Form("File %s copied from %s FXS into %s/%s",
1970                                                 filePath.Data(), GetSystemName(system),
1971                                                 GetShuttleTempDir(), localFileName.Data()));
1972                 }
1973
1974                 if (fileChecksum.Length()>0)
1975                 {
1976                         // compare md5sum of local file with the one stored in the FXS DB
1977                         Int_t md5Comp = gSystem->Exec(Form("md5sum %s/%s |grep %s 2>&1 > /dev/null",
1978                                                 GetShuttleTempDir(), localFileName.Data(), fileChecksum.Data()));
1979
1980                         if (md5Comp != 0)
1981                         {
1982                                 Log(detector, Form("GetFileName - md5sum of file %s does not match with local copy!",
1983                                                         filePath.Data()));
1984                                 result = kFALSE;
1985                                 continue;
1986                         }
1987                 } else {
1988                         Log(fCurrentDetector, Form("GetFile - md5sum of file %s not set in %s database, skipping comparison",
1989                                                         filePath.Data(), GetSystemName(system)));
1990                 }
1991                 if (result) break;
1992         }
1993
1994         if(!result) return 0;
1995
1996         fFXSCalled[system]=kTRUE;
1997         TObjString *fileParams = new TObjString(Form("%s#!?!#%s", id, sourceName.Data()));
1998         fFXSlist[system].Add(fileParams);
1999
2000         static TString fullLocalFileName;
2001         fullLocalFileName = TString::Format("%s/%s", GetShuttleTempDir(), localFileName.Data());
2002
2003         AliInfo(Form("fullLocalFileName = %s", fullLocalFileName.Data()));
2004
2005         return fullLocalFileName.Data();
2006
2007 }
2008
2009 //______________________________________________________________________________________________
2010 Bool_t AliShuttle::RetrieveFile(UInt_t system, const char* fxsFileName, const char* localFileName)
2011 {
2012         //
2013         // Copies file from FXS to local Shuttle machine
2014         //
2015
2016         // check temp directory: trying to cd to temp; if it does not exist, create it
2017         AliDebug(2, Form("Copy file %s from %s FXS into %s/%s",
2018                         GetSystemName(system), fxsFileName, GetShuttleTempDir(), localFileName));
2019
2020         void* dir = gSystem->OpenDirectory(GetShuttleTempDir());
2021         if (dir == NULL) {
2022                 if (gSystem->mkdir(GetShuttleTempDir(), kTRUE)) {
2023                         AliError(Form("Can't open directory <%s>", GetShuttleTempDir()));
2024                         return kFALSE;
2025                 }
2026
2027         } else {
2028                 gSystem->FreeDirectory(dir);
2029         }
2030
2031         TString baseFXSFolder;
2032         if (system == kDAQ)
2033         {
2034                 baseFXSFolder = "FES/";
2035         }
2036         else if (system == kDCS)
2037         {
2038                 baseFXSFolder = "";
2039         }
2040         else if (system == kHLT)
2041         {
2042                 baseFXSFolder = "/opt/FXS";
2043         }
2044
2045
2046         TString command = Form("scp -oPort=%d -2 %s@%s:%s%s %s/%s",
2047                 fConfig->GetFXSPort(system),
2048                 fConfig->GetFXSUser(system),
2049                 fConfig->GetFXSHost(system),
2050                 baseFXSFolder.Data(),
2051                 fxsFileName,
2052                 GetShuttleTempDir(),
2053                 localFileName);
2054
2055         AliDebug(2, Form("%s",command.Data()));
2056
2057         Bool_t result = (gSystem->Exec(command.Data()) == 0);
2058
2059         return result;
2060 }
2061
2062 //______________________________________________________________________________________________
2063 TList* AliShuttle::GetFileSources(Int_t system, const char* detector, const char* id)
2064 {
2065         //
2066         // Get sources producing the condition file Id from file exchange servers
2067         // if id is NULL all sources are returned (distinct)
2068         //
2069         
2070         // check if test mode should simulate a FXS error
2071         if (fTestMode & kErrorFXSSources)
2072         {
2073                 Log(detector, Form("GetFileSources - In TESTMODE - Simulating error while connecting to %s FXS", GetSystemName(system)));
2074                 return 0;
2075         }
2076
2077
2078         if (system == kDCS)
2079         {
2080                 AliWarning("DCS system has only one source of data!");
2081                 TList *list = new TList();
2082                 list->SetOwner(1);
2083                 list->Add(new TObjString(" "));
2084                 return list;
2085         }
2086
2087         // check connection, in case connect
2088         if (!Connect(system))
2089         {
2090                 Log(detector, Form("GetFileSources - Couldn't connect to %s FXS database", GetSystemName(system)));
2091                 return NULL;
2092         }
2093
2094         TString sourceName = 0;
2095         if (system == kDAQ)
2096         {
2097                 sourceName = "DAQsource";
2098         } else if (system == kHLT)
2099         {
2100                 sourceName = "DDLnumbers";
2101         }
2102
2103         TString sqlQueryStart = Form("select distinct %s from %s where", sourceName.Data(), fConfig->GetFXSdbTable(system));
2104         TString whereClause = Form("run=%d and detector=\"%s\"",
2105                                 GetCurrentRun(), detector);
2106         if (id)
2107                 whereClause += Form(" and fileId=\"%s\"", id);
2108         TString sqlQuery = Form("%s %s", sqlQueryStart.Data(), whereClause.Data());
2109
2110         AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
2111
2112         // Query execution
2113         TSQLResult* aResult;
2114         aResult = fServer[system]->Query(sqlQuery);
2115         if (!aResult) {
2116                 Log(detector, Form("GetFileSources - Can't execute SQL query to %s database for id: %s",
2117                                 GetSystemName(system), id));
2118                 return 0;
2119         }
2120
2121         TList *list = new TList();
2122         list->SetOwner(1);
2123         
2124         if (aResult->GetRowCount() == 0)
2125         {
2126                 Log(detector,
2127                         Form("GetFileSources - No entry in %s FXS table for id: %s", GetSystemName(system), id));
2128                 delete aResult;
2129                 return list;
2130         }
2131
2132         TSQLRow* aRow;
2133
2134         while ((aRow = aResult->Next()))
2135         {
2136
2137                 TString source(aRow->GetField(0), aRow->GetFieldLength(0));
2138                 AliDebug(2, Form("%s = %s", sourceName.Data(), source.Data()));
2139                 list->Add(new TObjString(source));
2140                 delete aRow;
2141         }
2142
2143         delete aResult;
2144
2145         return list;
2146 }
2147
2148 //______________________________________________________________________________________________
2149 TList* AliShuttle::GetFileIDs(Int_t system, const char* detector, const char* source)
2150 {
2151         //
2152         // Get all ids of condition files produced by a given source from file exchange servers
2153         //
2154         
2155         // check if test mode should simulate a FXS error
2156         if (fTestMode & kErrorFXSSources)
2157         {
2158                 Log(detector, Form("GetFileIDs - In TESTMODE - Simulating error while connecting to %s FXS", GetSystemName(system)));
2159                 return 0;
2160         }
2161
2162         // check connection, in case connect
2163         if (!Connect(system))
2164         {
2165                 Log(detector, Form("GetFileIDs - Couldn't connect to %s FXS database", GetSystemName(system)));
2166                 return NULL;
2167         }
2168
2169         TString sourceName = 0;
2170         if (system == kDAQ)
2171         {
2172                 sourceName = "DAQsource";
2173         } else if (system == kHLT)
2174         {
2175                 sourceName = "DDLnumbers";
2176         }
2177
2178         TString sqlQueryStart = Form("select fileId from %s where", fConfig->GetFXSdbTable(system));
2179         TString whereClause = Form("run=%d and detector=\"%s\"",
2180                                 GetCurrentRun(), detector);
2181         if (sourceName.Length() > 0 && source)
2182                 whereClause += Form(" and %s=\"%s\"", sourceName.Data(), source);
2183         TString sqlQuery = Form("%s %s", sqlQueryStart.Data(), whereClause.Data());
2184
2185         AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
2186
2187         // Query execution
2188         TSQLResult* aResult;
2189         aResult = fServer[system]->Query(sqlQuery);
2190         if (!aResult) {
2191                 Log(detector, Form("GetFileIDs - Can't execute SQL query to %s database for source: %s",
2192                                 GetSystemName(system), source));
2193                 return 0;
2194         }
2195
2196         TList *list = new TList();
2197         list->SetOwner(1);
2198         
2199         if (aResult->GetRowCount() == 0)
2200         {
2201                 Log(detector,
2202                         Form("GetFileIDs - No entry in %s FXS table for source: %s", GetSystemName(system), source));
2203                 delete aResult;
2204                 return list;
2205         }
2206
2207         TSQLRow* aRow;
2208
2209         while ((aRow = aResult->Next()))
2210         {
2211
2212                 TString id(aRow->GetField(0), aRow->GetFieldLength(0));
2213                 AliDebug(2, Form("fileId = %s", id.Data()));
2214                 list->Add(new TObjString(id));
2215                 delete aRow;
2216         }
2217
2218         delete aResult;
2219
2220         return list;
2221 }
2222
2223 //______________________________________________________________________________________________
2224 Bool_t AliShuttle::Connect(Int_t system)
2225 {
2226         // Connect to MySQL Server of the system's FXS MySQL databases
2227         // DAQ Logbook, Shuttle Logbook and DAQ FXS db are on the same host
2228         //
2229
2230         // check connection: if already connected return
2231         if(fServer[system] && fServer[system]->IsConnected()) return kTRUE;
2232
2233         TString dbHost, dbUser, dbPass, dbName;
2234
2235         if (system < 3) // FXS db servers
2236         {
2237                 dbHost = Form("mysql://%s:%d", fConfig->GetFXSdbHost(system), fConfig->GetFXSdbPort(system));
2238                 dbUser = fConfig->GetFXSdbUser(system);
2239                 dbPass = fConfig->GetFXSdbPass(system);
2240                 dbName =   fConfig->GetFXSdbName(system);
2241         } else { // Run & Shuttle logbook servers
2242         // TODO Will the Shuttle logbook server be the same as the Run logbook server ???
2243                 dbHost = Form("mysql://%s:%d", fConfig->GetDAQlbHost(), fConfig->GetDAQlbPort());
2244                 dbUser = fConfig->GetDAQlbUser();
2245                 dbPass = fConfig->GetDAQlbPass();
2246                 dbName =   fConfig->GetDAQlbDB();
2247         }
2248
2249         fServer[system] = TSQLServer::Connect(dbHost.Data(), dbUser.Data(), dbPass.Data());
2250         if (!fServer[system] || !fServer[system]->IsConnected()) {
2251                 if(system < 3)
2252                 {
2253                 AliError(Form("Can't establish connection to FXS database for %s",
2254                                         AliShuttleInterface::GetSystemName(system)));
2255                 } else {
2256                 AliError("Can't establish connection to Run logbook.");
2257                 }
2258                 if(fServer[system]) delete fServer[system];
2259                 return kFALSE;
2260         }
2261
2262         // Get tables
2263         TSQLResult* aResult=0;
2264         switch(system){
2265                 case kDAQ:
2266                         aResult = fServer[kDAQ]->GetTables(dbName.Data());
2267                         break;
2268                 case kDCS:
2269                         aResult = fServer[kDCS]->GetTables(dbName.Data());
2270                         break;
2271                 case kHLT:
2272                         aResult = fServer[kHLT]->GetTables(dbName.Data());
2273                         break;
2274                 default:
2275                         aResult = fServer[3]->GetTables(dbName.Data());
2276                         break;
2277         }
2278
2279         delete aResult;
2280         return kTRUE;
2281 }
2282
2283 //______________________________________________________________________________________________
2284 Bool_t AliShuttle::UpdateTable()
2285 {
2286         //
2287         // Update FXS table filling time_processed field in all rows corresponding to current run and detector
2288         //
2289
2290         Bool_t result = kTRUE;
2291
2292         for (UInt_t system=0; system<3; system++)
2293         {
2294                 if(!fFXSCalled[system]) continue;
2295
2296                 // check connection, in case connect
2297                 if (!Connect(system))
2298                 {
2299                         Log(fCurrentDetector, Form("UpdateTable - Couldn't connect to %s FXS database", GetSystemName(system)));
2300                         result = kFALSE;
2301                         continue;
2302                 }
2303
2304                 TTimeStamp now; // now
2305
2306                 // Loop on FXS list entries
2307                 TIter iter(&fFXSlist[system]);
2308                 TObjString *aFXSentry=0;
2309                 while ((aFXSentry = dynamic_cast<TObjString*> (iter.Next())))
2310                 {
2311                         TString aFXSentrystr = aFXSentry->String();
2312                         TObjArray *aFXSarray = aFXSentrystr.Tokenize("#!?!#");
2313                         if (!aFXSarray || aFXSarray->GetEntries() != 2 )
2314                         {
2315                                 Log(fCurrentDetector, Form("UpdateTable - error updating %s FXS entry. Check string: <%s>",
2316                                         GetSystemName(system), aFXSentrystr.Data()));
2317                                 if(aFXSarray) delete aFXSarray;
2318                                 result = kFALSE;
2319                                 continue;
2320                         }
2321                         const char* fileId = ((TObjString*) aFXSarray->At(0))->GetName();
2322                         const char* source = ((TObjString*) aFXSarray->At(1))->GetName();
2323
2324                         TString whereClause;
2325                         if (system == kDAQ)
2326                         {
2327                                 whereClause = Form("where run=%d and detector=\"%s\" and fileId=\"%s\" and DAQsource=\"%s\";",
2328                                                         GetCurrentRun(), fCurrentDetector.Data(), fileId, source);
2329                         }
2330                         else if (system == kDCS)
2331                         {
2332                                 whereClause = Form("where run=%d and detector=\"%s\" and fileId=\"%s\";",
2333                                                         GetCurrentRun(), fCurrentDetector.Data(), fileId);
2334                         }
2335                         else if (system == kHLT)
2336                         {
2337                                 whereClause = Form("where run=%d and detector=\"%s\" and fileId=\"%s\" and DDLnumbers=\"%s\";",
2338                                                         GetCurrentRun(), fCurrentDetector.Data(), fileId, source);
2339                         }
2340
2341                         delete aFXSarray;
2342
2343                         TString sqlQuery = Form("update %s set time_processed=%d %s", fConfig->GetFXSdbTable(system),
2344                                                                 now.GetSec(), whereClause.Data());
2345
2346                         AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
2347
2348                         // Query execution
2349                         TSQLResult* aResult;
2350                         aResult = dynamic_cast<TSQLResult*> (fServer[system]->Query(sqlQuery));
2351                         if (!aResult)
2352                         {
2353                                 Log(fCurrentDetector, Form("UpdateTable - %s db: can't execute SQL query <%s>",
2354                                                                 GetSystemName(system), sqlQuery.Data()));
2355                                 result = kFALSE;
2356                                 continue;
2357                         }
2358                         delete aResult;
2359                 }
2360         }
2361
2362         return result;
2363 }
2364
2365 //______________________________________________________________________________________________
2366 Bool_t AliShuttle::UpdateTableFailCase()
2367 {
2368         // Update FXS table filling time_processed field in all rows corresponding to current run and detector
2369         // this is called in case the preprocessor is declared failed for the current run, because
2370         // the fields are updated only in case of success
2371
2372         Bool_t result = kTRUE;
2373
2374         for (UInt_t system=0; system<3; system++)
2375         {
2376                 // check connection, in case connect
2377                 if (!Connect(system))
2378                 {
2379                         Log(fCurrentDetector, Form("UpdateTableFailCase - Couldn't connect to %s FXS database",
2380                                                         GetSystemName(system)));
2381                         result = kFALSE;
2382                         continue;
2383                 }
2384
2385                 TTimeStamp now; // now
2386
2387                 // Loop on FXS list entries
2388
2389                 TString whereClause = Form("where run=%d and detector=\"%s\";",
2390                                                 GetCurrentRun(), fCurrentDetector.Data());
2391
2392
2393                 TString sqlQuery = Form("update %s set time_processed=%d %s", fConfig->GetFXSdbTable(system),
2394                                                         now.GetSec(), whereClause.Data());
2395
2396                 AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
2397
2398                 // Query execution
2399                 TSQLResult* aResult;
2400                 aResult = dynamic_cast<TSQLResult*> (fServer[system]->Query(sqlQuery));
2401                 if (!aResult)
2402                 {
2403                         Log(fCurrentDetector, Form("UpdateTableFailCase - %s db: can't execute SQL query <%s>",
2404                                                         GetSystemName(system), sqlQuery.Data()));
2405                         result = kFALSE;
2406                         continue;
2407                 }
2408                 delete aResult;
2409         }
2410
2411         return result;
2412 }
2413
2414 //______________________________________________________________________________________________
2415 Bool_t AliShuttle::UpdateShuttleLogbook(const char* detector, const char* status)
2416 {
2417         //
2418         // Update Shuttle logbook filling detector or shuttle_done column
2419         // ex. of usage: UpdateShuttleLogbook("PHOS", "DONE") or UpdateShuttleLogbook("shuttle_done")
2420         //
2421
2422         // check connection, in case connect
2423         if(!Connect(3)){
2424                 Log("SHUTTLE", "UpdateShuttleLogbook - Couldn't connect to DAQ Logbook.");
2425                 return kFALSE;
2426         }
2427
2428         TString detName(detector);
2429         TString setClause;
2430         if(detName == "shuttle_done")
2431         {
2432                 setClause = "set shuttle_done=1";
2433
2434                 // Send the information to ML
2435                 TMonaLisaText  mlStatus("SHUTTLE_status", "Done");
2436
2437                 TList mlList;
2438                 mlList.Add(&mlStatus);
2439
2440                 fMonaLisa->SendParameters(&mlList);
2441         } else {
2442                 TString statusStr(status);
2443                 if(statusStr.Contains("done", TString::kIgnoreCase) ||
2444                    statusStr.Contains("failed", TString::kIgnoreCase)){
2445                         setClause = Form("set %s=\"%s\"", detector, status);
2446                 } else {
2447                         Log("SHUTTLE",
2448                                 Form("UpdateShuttleLogbook - Invalid status <%s> for detector %s",
2449                                         status, detector));
2450                         return kFALSE;
2451                 }
2452         }
2453
2454         TString whereClause = Form("where run=%d", GetCurrentRun());
2455
2456         TString sqlQuery = Form("update %s %s %s",
2457                                         fConfig->GetShuttlelbTable(), setClause.Data(), whereClause.Data());
2458
2459         AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
2460
2461         // Query execution
2462         TSQLResult* aResult;
2463         aResult = dynamic_cast<TSQLResult*> (fServer[3]->Query(sqlQuery));
2464         if (!aResult) {
2465                 Log("SHUTTLE", Form("UpdateShuttleLogbook - Can't execute query <%s>", sqlQuery.Data()));
2466                 return kFALSE;
2467         }
2468         delete aResult;
2469
2470         return kTRUE;
2471 }
2472
2473 //______________________________________________________________________________________________
2474 Int_t AliShuttle::GetCurrentRun() const
2475 {
2476         //
2477         // Get current run from logbook entry
2478         //
2479
2480         return fLogbookEntry ? fLogbookEntry->GetRun() : -1;
2481 }
2482
2483 //______________________________________________________________________________________________
2484 UInt_t AliShuttle::GetCurrentStartTime() const
2485 {
2486         //
2487         // get current start time
2488         //
2489
2490         return fLogbookEntry ? fLogbookEntry->GetStartTime() : 0;
2491 }
2492
2493 //______________________________________________________________________________________________
2494 UInt_t AliShuttle::GetCurrentEndTime() const
2495 {
2496         //
2497         // get current end time from logbook entry
2498         //
2499
2500         return fLogbookEntry ? fLogbookEntry->GetEndTime() : 0;
2501 }
2502
2503 //______________________________________________________________________________________________
2504 void AliShuttle::Log(const char* detector, const char* message)
2505 {
2506         //
2507         // Fill log string with a message
2508         //
2509
2510         void* dir = gSystem->OpenDirectory(GetShuttleLogDir());
2511         if (dir == NULL) {
2512                 if (gSystem->mkdir(GetShuttleLogDir(), kTRUE)) {
2513                         AliError(Form("Can't open directory <%s>", GetShuttleLogDir()));
2514                         return;
2515                 }
2516
2517         } else {
2518                 gSystem->FreeDirectory(dir);
2519         }
2520
2521         TString toLog = Form("%s (%d): %s - ", TTimeStamp(time(0)).AsString("s"), getpid(), detector);
2522         if (GetCurrentRun() >= 0) 
2523                 toLog += Form("run %d - ", GetCurrentRun());
2524         toLog += Form("%s", message);
2525
2526         AliInfo(toLog.Data());
2527         
2528         // if we redirect the log output already to the file, leave here
2529         if (fOutputRedirected && strcmp(detector, "SHUTTLE") != 0)
2530                 return;
2531
2532         TString fileName = GetLogFileName(detector);
2533         
2534         gSystem->ExpandPathName(fileName);
2535
2536         ofstream logFile;
2537         logFile.open(fileName, ofstream::out | ofstream::app);
2538
2539         if (!logFile.is_open()) {
2540                 AliError(Form("Could not open file %s", fileName.Data()));
2541                 return;
2542         }
2543
2544         logFile << toLog.Data() << "\n";
2545
2546         logFile.close();
2547 }
2548
2549 //______________________________________________________________________________________________
2550 TString AliShuttle::GetLogFileName(const char* detector) const
2551 {
2552         // 
2553         // returns the name of the log file for a given sub detector
2554         //
2555         
2556         TString fileName;
2557         
2558         if (GetCurrentRun() >= 0) 
2559                 fileName.Form("%s/%s_%d.log", GetShuttleLogDir(), detector, GetCurrentRun());
2560         else
2561                 fileName.Form("%s/%s.log", GetShuttleLogDir(), detector);
2562
2563         return fileName;
2564 }
2565
2566 //______________________________________________________________________________________________
2567 Bool_t AliShuttle::Collect(Int_t run)
2568 {
2569         //
2570         // Collects conditions data for all UNPROCESSED run written to DAQ LogBook in case of run = -1 (default)
2571         // If a dedicated run is given this run is processed
2572         //
2573         // In operational mode, this is the Shuttle function triggered by the EOR signal.
2574         //
2575
2576         if (run == -1)
2577                 Log("SHUTTLE","Collect - Shuttle called. Collecting conditions data for unprocessed runs");
2578         else
2579                 Log("SHUTTLE", Form("Collect - Shuttle called. Collecting conditions data for run %d", run));
2580
2581         SetLastAction("Starting");
2582
2583         TString whereClause("where shuttle_done=0");
2584         if (run != -1)
2585                 whereClause += Form(" and run=%d", run);
2586
2587         TObjArray shuttleLogbookEntries;
2588         if (!QueryShuttleLogbook(whereClause, shuttleLogbookEntries))
2589         {
2590                 Log("SHUTTLE", "Collect - Can't retrieve entries from Shuttle logbook");
2591                 return kFALSE;
2592         }
2593
2594         if (shuttleLogbookEntries.GetEntries() == 0)
2595         {
2596                 if (run == -1)
2597                         Log("SHUTTLE","Collect - Found no UNPROCESSED runs in Shuttle logbook");
2598                 else
2599                         Log("SHUTTLE", Form("Collect - Run %d is already DONE "
2600                                                 "or it does not exist in Shuttle logbook", run));
2601                 return kTRUE;
2602         }
2603
2604         for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
2605                 fFirstUnprocessed[iDet] = kTRUE;
2606
2607         if (run != -1)
2608         {
2609                 // query Shuttle logbook for earlier runs, check if some detectors are unprocessed,
2610                 // flag them into fFirstUnprocessed array
2611                 TString whereClause(Form("where shuttle_done=0 and run < %d", run));
2612                 TObjArray tmpLogbookEntries;
2613                 if (!QueryShuttleLogbook(whereClause, tmpLogbookEntries))
2614                 {
2615                         Log("SHUTTLE", "Collect - Can't retrieve entries from Shuttle logbook");
2616                         return kFALSE;
2617                 }
2618
2619                 TIter iter(&tmpLogbookEntries);
2620                 AliShuttleLogbookEntry* anEntry = 0;
2621                 while ((anEntry = dynamic_cast<AliShuttleLogbookEntry*> (iter.Next())))
2622                 {
2623                         for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
2624                         {
2625                                 if (anEntry->GetDetectorStatus(iDet) == AliShuttleLogbookEntry::kUnprocessed)
2626                                 {
2627                                         AliDebug(2, Form("Run %d: setting %s as \"not first time unprocessed\"",
2628                                                         anEntry->GetRun(), GetDetName(iDet)));
2629                                         fFirstUnprocessed[iDet] = kFALSE;
2630                                 }
2631                         }
2632
2633                 }
2634
2635         }
2636
2637         if (!RetrieveConditionsData(shuttleLogbookEntries))
2638         {
2639                 Log("SHUTTLE", "Collect - Process of at least one run failed");
2640                 return kFALSE;
2641         }
2642
2643         Log("SHUTTLE", "Collect - Requested run(s) successfully processed");
2644         return kTRUE;
2645 }
2646
2647 //______________________________________________________________________________________________
2648 Bool_t AliShuttle::RetrieveConditionsData(const TObjArray& dateEntries)
2649 {
2650         //
2651         // Retrieve conditions data for all runs that aren't processed yet
2652         //
2653
2654         Bool_t hasError = kFALSE;
2655
2656         TIter iter(&dateEntries);
2657         AliShuttleLogbookEntry* anEntry;
2658
2659         while ((anEntry = (AliShuttleLogbookEntry*) iter.Next())){
2660                 if (!Process(anEntry)){
2661                         hasError = kTRUE;
2662                 }
2663
2664                 // clean SHUTTLE temp directory
2665                 TString filename = Form("%s/*.shuttle", GetShuttleTempDir());
2666                 RemoveFile(filename.Data());
2667         }
2668
2669         return hasError == kFALSE;
2670 }
2671
2672 //______________________________________________________________________________________________
2673 ULong_t AliShuttle::GetTimeOfLastAction() const
2674 {
2675         //
2676         // Gets time of last action
2677         //
2678
2679         ULong_t tmp;
2680
2681         fMonitoringMutex->Lock();
2682
2683         tmp = fLastActionTime;
2684
2685         fMonitoringMutex->UnLock();
2686
2687         return tmp;
2688 }
2689
2690 //______________________________________________________________________________________________
2691 const TString AliShuttle::GetLastAction() const
2692 {
2693         //
2694         // returns a string description of the last action
2695         //
2696
2697         TString tmp;
2698
2699         fMonitoringMutex->Lock();
2700         
2701         tmp = fLastAction;
2702         
2703         fMonitoringMutex->UnLock();
2704
2705         return tmp;
2706 }
2707
2708 //______________________________________________________________________________________________
2709 void AliShuttle::SetLastAction(const char* action)
2710 {
2711         //
2712         // updates the monitoring variables
2713         //
2714
2715         fMonitoringMutex->Lock();
2716
2717         fLastAction = action;
2718         fLastActionTime = time(0);
2719         
2720         fMonitoringMutex->UnLock();
2721 }
2722
2723 //______________________________________________________________________________________________
2724 const char* AliShuttle::GetRunParameter(const char* param)
2725 {
2726         //
2727         // returns run parameter read from DAQ logbook
2728         //
2729
2730         if(!fLogbookEntry) {
2731                 AliError("No logbook entry!");
2732                 return 0;
2733         }
2734
2735         return fLogbookEntry->GetRunParameter(param);
2736 }
2737
2738 //______________________________________________________________________________________________
2739 AliCDBEntry* AliShuttle::GetFromOCDB(const char* detector, const AliCDBPath& path)
2740 {
2741         //
2742         // returns object from OCDB valid for current run
2743         //
2744
2745         if (fTestMode & kErrorOCDB)
2746         {
2747                 Log(detector, "GetFromOCDB - In TESTMODE - Simulating error with OCDB");
2748                 return 0;
2749         }
2750         
2751         AliCDBStorage *sto = AliCDBManager::Instance()->GetStorage(fgkMainCDB);
2752         if (!sto)
2753         {
2754                 Log(detector, "GetFromOCDB - Cannot activate main OCDB for query!");
2755                 return 0;
2756         }
2757
2758         return dynamic_cast<AliCDBEntry*> (sto->Get(path, GetCurrentRun()));
2759 }
2760
2761 //______________________________________________________________________________________________
2762 Bool_t AliShuttle::SendMail()
2763 {
2764         //
2765         // sends a mail to the subdetector expert in case of preprocessor error
2766         //
2767         
2768         if (fTestMode != kNone)
2769                 return kTRUE;
2770
2771         void* dir = gSystem->OpenDirectory(GetShuttleLogDir());
2772         if (dir == NULL)
2773         {
2774                 if (gSystem->mkdir(GetShuttleLogDir(), kTRUE))
2775                 {
2776                         AliError(Form("Can't open directory <%s>", GetShuttleLogDir()));
2777                         return kFALSE;
2778                 }
2779
2780         } else {
2781                 gSystem->FreeDirectory(dir);
2782         }
2783
2784         TString bodyFileName;
2785         bodyFileName.Form("%s/mail.body", GetShuttleLogDir());
2786         gSystem->ExpandPathName(bodyFileName);
2787
2788         ofstream mailBody;
2789         mailBody.open(bodyFileName, ofstream::out);
2790
2791         if (!mailBody.is_open())
2792         {
2793                 AliError(Form("Could not open mail body file %s", bodyFileName.Data()));
2794                 return kFALSE;
2795         }
2796
2797         TString to="";
2798         TIter iterExperts(fConfig->GetResponsibles(fCurrentDetector));
2799         TObjString *anExpert=0;
2800         while ((anExpert = (TObjString*) iterExperts.Next()))
2801         {
2802                 to += Form("%s,", anExpert->GetName());
2803         }
2804         to.Remove(to.Length()-1);
2805         AliDebug(2, Form("to: %s",to.Data()));
2806
2807         if (to.IsNull()) {
2808                 AliInfo("List of detector responsibles not yet set!");
2809                 return kFALSE;
2810         }
2811
2812         TString cc="alberto.colla@cern.ch";
2813
2814         TString subject = Form("%s Shuttle preprocessor FAILED in run %d !",
2815                                 fCurrentDetector.Data(), GetCurrentRun());
2816         AliDebug(2, Form("subject: %s", subject.Data()));
2817
2818         TString body = Form("Dear %s expert(s), \n\n", fCurrentDetector.Data());
2819         body += Form("SHUTTLE just detected that your preprocessor "
2820                         "failed processing run %d!!\n\n", GetCurrentRun());
2821         body += Form("Please check %s status on the SHUTTLE monitoring page: \n\n", fCurrentDetector.Data());
2822         body += Form("\thttp://pcalimonitor.cern.ch:8889/shuttle.jsp?time=168 \n\n");
2823         body += Form("Find the %s log for the current run on \n\n"
2824                 "\thttp://pcalishuttle01.cern.ch:8880/logs/%s_%d.log \n\n", 
2825                 fCurrentDetector.Data(), fCurrentDetector.Data(), GetCurrentRun());
2826         body += Form("The last 10 lines of %s log file are following:\n\n");
2827
2828         AliDebug(2, Form("Body begin: %s", body.Data()));
2829
2830         mailBody << body.Data();
2831         mailBody.close();
2832         mailBody.open(bodyFileName, ofstream::out | ofstream::app);
2833
2834         TString logFileName = Form("%s/%s_%d.log", GetShuttleLogDir(), fCurrentDetector.Data(), GetCurrentRun());
2835         TString tailCommand = Form("tail -n 10 %s >> %s", logFileName.Data(), bodyFileName.Data());
2836         if (gSystem->Exec(tailCommand.Data()))
2837         {
2838                 mailBody << Form("%s log file not found ...\n\n", fCurrentDetector.Data());
2839         }
2840
2841         TString endBody = Form("------------------------------------------------------\n\n");
2842         endBody += Form("In case of problems please contact the SHUTTLE core team.\n\n");
2843         endBody += "Please do not answer this message directly, it is automatically generated.\n\n";
2844         endBody += "Greetings,\n\n \t\t\tthe SHUTTLE\n";
2845
2846         AliDebug(2, Form("Body end: %s", endBody.Data()));
2847
2848         mailBody << endBody.Data();
2849
2850         mailBody.close();
2851
2852         // send mail!
2853         TString mailCommand = Form("mail -s \"%s\" -c %s %s < %s",
2854                                                 subject.Data(),
2855                                                 cc.Data(),
2856                                                 to.Data(),
2857                                                 bodyFileName.Data());
2858         AliDebug(2, Form("mail command: %s", mailCommand.Data()));
2859
2860         Bool_t result = gSystem->Exec(mailCommand.Data());
2861
2862         return result == 0;
2863 }
2864
2865 //______________________________________________________________________________________________
2866 const char* AliShuttle::GetRunType()
2867 {
2868         //
2869         // returns run type read from "run type" logbook
2870         //
2871
2872         if(!fLogbookEntry) {
2873                 AliError("No logbook entry!");
2874                 return 0;
2875         }
2876
2877         return fLogbookEntry->GetRunType();
2878 }
2879
2880 //______________________________________________________________________________________________
2881 void AliShuttle::SetShuttleTempDir(const char* tmpDir)
2882 {
2883         //
2884         // sets Shuttle temp directory
2885         //
2886
2887         fgkShuttleTempDir = gSystem->ExpandPathName(tmpDir);
2888 }
2889
2890 //______________________________________________________________________________________________
2891 void AliShuttle::SetShuttleLogDir(const char* logDir)
2892 {
2893         //
2894         // sets Shuttle log directory
2895         //
2896
2897         fgkShuttleLogDir = gSystem->ExpandPathName(logDir);
2898 }