]> git.uio.no Git - u/mrichter/AliRoot.git/blob - SHUTTLE/AliShuttle.cxx
In ESDtoAOD: updating instead of creating new AliAODHeader.
[u/mrichter/AliRoot.git] / SHUTTLE / AliShuttle.cxx
1 /**************************************************************************
2  * Copyright(c) 1998-1999, ALICE Experiment at CERN, All rights reserved. *
3  *                                                                        *
4  * Author: The ALICE Off-line Project.                                    *
5  * Contributors are mentioned in the code where appropriate.              *
6  *                                                                        *
7  * Permission to use, copy, modify and distribute this software and its   *
8  * documentation strictly for non-commercial purposes is hereby granted   *
9  * without fee, provided that the above copyright notice appears in all   *
10  * copies and that both the copyright notice and this permission notice   *
11  * appear in the supporting documentation. The authors make no claims     *
12  * about the suitability of this software for any purpose. It is          *
13  * provided "as is" without express or implied warranty.                  *
14  **************************************************************************/
15
16 /*
17 $Log$
18 Revision 1.44  2007/05/11 16:09:32  acolla
19 Reference files for ITS, MUON and PHOS are now stored in OfflineDetName/OnlineDetName/run_...
20 example: ITS/SPD/100_filename.root
21
22 Revision 1.43  2007/05/10 09:59:51  acolla
23 Various bug fixes in StoreRefFilesToGrid; Cleaning of reference storage before processing detector (CleanReferenceStorage)
24
25 Revision 1.42  2007/05/03 08:01:39  jgrosseo
26 typo in last commit :-(
27
28 Revision 1.41  2007/05/03 08:00:48  jgrosseo
29 fixing log message when pp want to skip dcs value retrieval
30
31 Revision 1.40  2007/04/27 07:06:48  jgrosseo
32 GetFileSources returns empty list in case of no files, but successful query
33 No mails sent in testmode
34
35 Revision 1.39  2007/04/17 12:43:57  acolla
36 Correction in StoreOCDB; change of text in mail to detector expert
37
38 Revision 1.38  2007/04/12 08:26:18  jgrosseo
39 updated comment
40
41 Revision 1.37  2007/04/10 16:53:14  jgrosseo
42 redirecting sub detector stdout, stderr to sub detector log file
43
44 Revision 1.35  2007/04/04 16:26:38  acolla
45 1. Re-organization of function calls in TestPreprocessor to make it more meaningful.
46 2. Added missing dependency in test preprocessors.
47 3. in AliShuttle.cxx: processing time and memory consumption info on a single line.
48
49 Revision 1.34  2007/04/04 10:33:36  jgrosseo
50 1) Storing of files to the Grid is now done _after_ your preprocessors succeeded. This is transparent, which means that you can still use the same functions (Store, StoreReferenceData) to store files to the Grid. However, the Shuttle first stores them locally and transfers them after the preprocessor finished. The return code of these two functions has changed from UInt_t to Bool_t which gives you the success of the storing.
51 In case of an error with the Grid, the Shuttle will retry the storing later, the preprocessor does not need to be run again.
52
53 2) The meaning of the return code of the preprocessor has changed. 0 is now success and any other value means failure. This value is stored in the log and you can use it to keep details about the error condition.
54
55 3) New function StoreReferenceFile to _directly_ store a file (without opening it) to the reference storage.
56
57 4) The memory usage of the preprocessor is monitored. If it exceeds 2 GB it is terminated.
58
59 5) New function AliPreprocessor::ProcessDCS(). If you do not need to have DCS data in all cases, you can skip the processing by implemting this function and returning kFALSE under certain conditions. E.g. if there is a certain run type.
60 If you always need DCS data (like before), you do not need to implement it.
61
62 6) The run type has been added to the monitoring page
63
64 Revision 1.33  2007/04/03 13:56:01  acolla
65 Grid Storage at the end of preprocessing. Added virtual method to disable DCS query according to the
66 run type.
67
68 Revision 1.32  2007/02/28 10:41:56  acolla
69 Run type field added in SHUTTLE framework. Run type is read from "run type" logbook and retrieved by
70 AliPreprocessor::GetRunType() function.
71 Added some ldap definition files.
72
73 Revision 1.30  2007/02/13 11:23:21  acolla
74 Moved getters and setters of Shuttle's main OCDB/Reference, local
75 OCDB/Reference, temp and log folders to AliShuttleInterface
76
77 Revision 1.27  2007/01/30 17:52:42  jgrosseo
78 adding monalisa monitoring
79
80 Revision 1.26  2007/01/23 19:20:03  acolla
81 Removed old ldif files, added TOF, MCH ldif files. Added some options in
82 AliShuttleConfig::Print. Added in Ali Shuttle: SetShuttleTempDir and
83 SetShuttleLogDir
84
85 Revision 1.25  2007/01/15 19:13:52  acolla
86 Moved some AliInfo to AliDebug in SendMail function
87
88 Revision 1.21  2006/12/07 08:51:26  jgrosseo
89 update (alberto):
90 table, db names in ldap configuration
91 added GRP preprocessor
92 DCS data can also be retrieved by data point
93
94 Revision 1.20  2006/11/16 16:16:48  jgrosseo
95 introducing strict run ordering flag
96 removed giving preprocessor name to preprocessor, they have to know their name themselves ;-)
97
98 Revision 1.19  2006/11/06 14:23:04  jgrosseo
99 major update (Alberto)
100 o) reading of run parameters from the logbook
101 o) online offline naming conversion
102 o) standalone DCSclient package
103
104 Revision 1.18  2006/10/20 15:22:59  jgrosseo
105 o) Adding time out to the execution of the preprocessors: The Shuttle forks and the parent process monitors the child
106 o) Merging Collect, CollectAll, CollectNew function
107 o) Removing implementation of empty copy constructors (declaration still there!)
108
109 Revision 1.17  2006/10/05 16:20:55  jgrosseo
110 adapting to new CDB classes
111
112 Revision 1.16  2006/10/05 15:46:26  jgrosseo
113 applying to the new interface
114
115 Revision 1.15  2006/10/02 16:38:39  jgrosseo
116 update (alberto):
117 fixed memory leaks
118 storing of objects that failed to be stored to the grid before
119 interfacing of shuttle status table in daq system
120
121 Revision 1.14  2006/08/29 09:16:05  jgrosseo
122 small update
123
124 Revision 1.13  2006/08/15 10:50:00  jgrosseo
125 effc++ corrections (alberto)
126
127 Revision 1.12  2006/08/08 14:19:29  jgrosseo
128 Update to shuttle classes (Alberto)
129
130 - Possibility to set the full object's path in the Preprocessor's and
131 Shuttle's  Store functions
132 - Possibility to extend the object's run validity in the same classes
133 ("startValidity" and "validityInfinite" parameters)
134 - Implementation of the StoreReferenceData function to store reference
135 data in a dedicated CDB storage.
136
137 Revision 1.11  2006/07/21 07:37:20  jgrosseo
138 last run is stored after each run
139
140 Revision 1.10  2006/07/20 09:54:40  jgrosseo
141 introducing status management: The processing per subdetector is divided into several steps,
142 after each step the status is stored on disk. If the system crashes in any of the steps the Shuttle
143 can keep track of the number of failures and skips further processing after a certain threshold is
144 exceeded. These thresholds can be configured in LDAP.
145
146 Revision 1.9  2006/07/19 10:09:55  jgrosseo
147 new configuration, accesst to DAQ FES (Alberto)
148
149 Revision 1.8  2006/07/11 12:44:36  jgrosseo
150 adding parameters for extended validity range of data produced by preprocessor
151
152 Revision 1.7  2006/07/10 14:37:09  jgrosseo
153 small fix + todo comment
154
155 Revision 1.6  2006/07/10 13:01:41  jgrosseo
156 enhanced storing of last sucessfully processed run (alberto)
157
158 Revision 1.5  2006/07/04 14:59:57  jgrosseo
159 revision of AliDCSValue: Removed wrapper classes, reduced storage size per value by factor 2
160
161 Revision 1.4  2006/06/12 09:11:16  jgrosseo
162 coding conventions (Alberto)
163
164 Revision 1.3  2006/06/06 14:26:40  jgrosseo
165 o) removed files that were moved to STEER
166 o) shuttle updated to follow the new interface (Alberto)
167
168 Revision 1.2  2006/03/07 07:52:34  hristov
169 New version (B.Yordanov)
170
171 Revision 1.6  2005/11/19 17:19:14  byordano
172 RetrieveDATEEntries and RetrieveConditionsData added
173
174 Revision 1.5  2005/11/19 11:09:27  byordano
175 AliShuttle declaration added
176
177 Revision 1.4  2005/11/17 17:47:34  byordano
178 TList changed to TObjArray
179
180 Revision 1.3  2005/11/17 14:43:23  byordano
181 import to local CVS
182
183 Revision 1.1.1.1  2005/10/28 07:33:58  hristov
184 Initial import as subdirectory in AliRoot
185
186 Revision 1.2  2005/09/13 08:41:15  byordano
187 default startTime endTime added
188
189 Revision 1.4  2005/08/30 09:13:02  byordano
190 some docs added
191
192 Revision 1.3  2005/08/29 21:15:47  byordano
193 some docs added
194
195 */
196
197 //
198 // This class is the main manager for AliShuttle. 
199 // It organizes the data retrieval from DCS and call the 
200 // interface methods of AliPreprocessor.
201 // For every detector in AliShuttleConfgi (see AliShuttleConfig),
202 // data for its set of aliases is retrieved. If there is registered
203 // AliPreprocessor for this detector then it will be used
204 // accroding to the schema (see AliPreprocessor).
205 // If there isn't registered AliPreprocessor than the retrieved
206 // data is stored automatically to the undelying AliCDBStorage.
207 // For detSpec is used the alias name.
208 //
209
210 #include "AliShuttle.h"
211
212 #include "AliCDBManager.h"
213 #include "AliCDBStorage.h"
214 #include "AliCDBId.h"
215 #include "AliCDBRunRange.h"
216 #include "AliCDBPath.h"
217 #include "AliCDBEntry.h"
218 #include "AliShuttleConfig.h"
219 #include "DCSClient/AliDCSClient.h"
220 #include "AliLog.h"
221 #include "AliPreprocessor.h"
222 #include "AliShuttleStatus.h"
223 #include "AliShuttleLogbookEntry.h"
224
225 #include <TSystem.h>
226 #include <TObject.h>
227 #include <TString.h>
228 #include <TTimeStamp.h>
229 #include <TObjString.h>
230 #include <TSQLServer.h>
231 #include <TSQLResult.h>
232 #include <TSQLRow.h>
233 #include <TMutex.h>
234 #include <TSystemDirectory.h>
235 #include <TSystemFile.h>
236 #include <TFileMerger.h>
237 #include <TGrid.h>
238 #include <TGridResult.h>
239
240 #include <TMonaLisaWriter.h>
241
242 #include <fstream>
243
244 #include <sys/types.h>
245 #include <sys/wait.h>
246
247 ClassImp(AliShuttle)
248
249 //______________________________________________________________________________________________
250 AliShuttle::AliShuttle(const AliShuttleConfig* config,
251                 UInt_t timeout, Int_t retries):
252 fConfig(config),
253 fTimeout(timeout), fRetries(retries),
254 fPreprocessorMap(),
255 fLogbookEntry(0),
256 fCurrentDetector(),
257 fStatusEntry(0),
258 fMonitoringMutex(0),
259 fLastActionTime(0),
260 fLastAction(),
261 fMonaLisa(0),
262 fTestMode(kNone),
263 fReadTestMode(kFALSE),
264 fOutputRedirected(kFALSE)
265 {
266         //
267         // config: AliShuttleConfig used
268         // timeout: timeout used for AliDCSClient connection
269         // retries: the number of retries in case of connection error.
270         //
271
272         if (!fConfig->IsValid()) AliFatal("********** !!!!! Invalid configuration !!!!! **********");
273         for(int iSys=0;iSys<4;iSys++) {
274                 fServer[iSys]=0;
275                 if (iSys < 3)
276                         fFXSlist[iSys].SetOwner(kTRUE);
277         }
278         fPreprocessorMap.SetOwner(kTRUE);
279
280         for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
281                 fFirstUnprocessed[iDet] = kFALSE;
282
283         fMonitoringMutex = new TMutex();
284 }
285
286 //______________________________________________________________________________________________
287 AliShuttle::~AliShuttle()
288 {
289         //
290         // destructor
291         //
292
293         fPreprocessorMap.DeleteAll();
294         for(int iSys=0;iSys<4;iSys++)
295                 if(fServer[iSys]) {
296                         fServer[iSys]->Close();
297                         delete fServer[iSys];
298                         fServer[iSys] = 0;
299                 }
300
301         if (fStatusEntry){
302                 delete fStatusEntry;
303                 fStatusEntry = 0;
304         }
305         
306         if (fMonitoringMutex) 
307         {
308                 delete fMonitoringMutex;
309                 fMonitoringMutex = 0;
310         }
311 }
312
313 //______________________________________________________________________________________________
314 void AliShuttle::RegisterPreprocessor(AliPreprocessor* preprocessor)
315 {
316         //
317         // Registers new AliPreprocessor.
318         // It uses GetName() for indentificator of the pre processor.
319         // The pre processor is registered it there isn't any other
320         // with the same identificator (GetName()).
321         //
322
323         const char* detName = preprocessor->GetName();
324         if(GetDetPos(detName) < 0)
325                 AliFatal(Form("********** !!!!! Invalid detector name: %s !!!!! **********", detName));
326
327         if (fPreprocessorMap.GetValue(detName)) {
328                 AliWarning(Form("AliPreprocessor %s is already registered!", detName));
329                 return;
330         }
331
332         fPreprocessorMap.Add(new TObjString(detName), preprocessor);
333 }
334 //______________________________________________________________________________________________
335 Bool_t AliShuttle::Store(const AliCDBPath& path, TObject* object,
336                 AliCDBMetaData* metaData, Int_t validityStart, Bool_t validityInfinite)
337 {
338         // Stores a CDB object in the storage for offline reconstruction. Objects that are not needed for
339         // offline reconstruction, but should be stored anyway (e.g. for debugging) should NOT be stored
340         // using this function. Use StoreReferenceData instead!
341         // It calls StoreLocally function which temporarily stores the data locally; when the preprocessor
342         // finishes the data are transferred to the main storage (Grid).
343
344         return StoreLocally(fgkLocalCDB, path, object, metaData, validityStart, validityInfinite);
345 }
346
347 //______________________________________________________________________________________________
348 Bool_t AliShuttle::StoreReferenceData(const AliCDBPath& path, TObject* object, AliCDBMetaData* metaData)
349 {
350         // Stores a CDB object in the storage for reference data. This objects will not be available during
351         // offline reconstrunction. Use this function for reference data only!
352         // It calls StoreLocally function which temporarily stores the data locally; when the preprocessor
353         // finishes the data are transferred to the main storage (Grid).
354
355         return StoreLocally(fgkLocalRefStorage, path, object, metaData);
356 }
357
358 //______________________________________________________________________________________________
359 Bool_t AliShuttle::StoreLocally(const TString& localUri,
360                         const AliCDBPath& path, TObject* object, AliCDBMetaData* metaData,
361                         Int_t validityStart, Bool_t validityInfinite)
362 {
363         // Store object temporarily in local storage. Parameters are passed by Store and StoreReferenceData functions.
364         // when the preprocessor finishes the data are transferred to the main storage (Grid).
365         // The parameters are:
366         //   1) Uri of the backup storage (Local)
367         //   2) the object's path.
368         //   3) the object to be stored
369         //   4) the metaData to be associated with the object
370         //   5) the validity start run number w.r.t. the current run,
371         //      if the data is valid only for this run leave the default 0
372         //   6) specifies if the calibration data is valid for infinity (this means until updated),
373         //      typical for calibration runs, the default is kFALSE
374         //
375         // returns 0 if fail, 1 otherwise
376
377         if (fTestMode & kErrorStorage)
378         {
379                 Log(fCurrentDetector, "StoreLocally - In TESTMODE - Simulating error while storing locally");
380                 return kFALSE;
381         }
382         
383         const char* cdbType = (localUri == fgkLocalCDB) ? "CDB" : "Reference";
384
385         Int_t firstRun = GetCurrentRun() - validityStart;
386         if(firstRun < 0) {
387                 AliWarning("First valid run happens to be less than 0! Setting it to 0.");
388                 firstRun=0;
389         }
390
391         Int_t lastRun = -1;
392         if(validityInfinite) {
393                 lastRun = AliCDBRunRange::Infinity();
394         } else {
395                 lastRun = GetCurrentRun();
396         }
397
398         // Version is set to current run, it will be used later to transfer data to Grid
399         AliCDBId id(path, firstRun, lastRun, GetCurrentRun(), -1);
400
401         if(! dynamic_cast<TObjString*> (metaData->GetProperty("RunUsed(TObjString)"))){
402                 TObjString runUsed = Form("%d", GetCurrentRun());
403                 metaData->SetProperty("RunUsed(TObjString)", runUsed.Clone());
404         }
405
406         Bool_t result = kFALSE;
407
408         if (!(AliCDBManager::Instance()->GetStorage(localUri))) {
409                 Log("SHUTTLE", Form("StoreLocally - Cannot activate local %s storage", cdbType));
410         } else {
411                 result = AliCDBManager::Instance()->GetStorage(localUri)
412                                         ->Put(object, id, metaData);
413         }
414
415         if(!result) {
416
417                 Log(fCurrentDetector, Form("StoreLocally - Can't store object <%s>!", id.ToString().Data()));
418         }
419
420         return result;
421 }
422
423 //______________________________________________________________________________________________
424 Bool_t AliShuttle::StoreOCDB()
425 {
426         //
427         // Called when preprocessor ends successfully or when previous storage attempt failed (kStoreError status)
428         // Calls underlying StoreOCDB(const char*) function twice, for OCDB and Reference storage.
429         // Then calls StoreRefFilesToGrid to store reference files. 
430         //
431         
432         if (fTestMode & kErrorGrid)
433         {
434                 Log("SHUTTLE", "StoreOCDB - In TESTMODE - Simulating error while storing in the Grid");
435                 Log(fCurrentDetector, "StoreOCDB - In TESTMODE - Simulating error while storing in the Grid");
436                 return kFALSE;
437         }
438         
439         Log("SHUTTLE","Storing OCDB data ...");
440         Bool_t resultCDB = StoreOCDB(fgkMainCDB);
441
442         Log("SHUTTLE","Storing reference data ...");
443         Bool_t resultRef = StoreOCDB(fgkMainRefStorage);
444         
445         Log("SHUTTLE","Storing reference files ...");
446         Bool_t resultRefFiles = StoreRefFilesToGrid();
447         
448         return resultCDB && resultRef && resultRefFiles;
449 }
450
451 //______________________________________________________________________________________________
452 Bool_t AliShuttle::StoreOCDB(const TString& gridURI)
453 {
454         //
455         // Called by StoreOCDB(), performs actual storage to the main OCDB and reference storages (Grid)
456         //
457
458         TObjArray* gridIds=0;
459
460         Bool_t result = kTRUE;
461
462         const char* type = 0;
463         TString localURI;
464         if(gridURI == fgkMainCDB) {
465                 type = "OCDB";
466                 localURI = fgkLocalCDB;
467         } else if(gridURI == fgkMainRefStorage) {
468                 type = "reference";
469                 localURI = fgkLocalRefStorage;
470         } else {
471                 AliError(Form("Invalid storage URI: %s", gridURI.Data()));
472                 return kFALSE;
473         }
474
475         AliCDBManager* man = AliCDBManager::Instance();
476
477         AliCDBStorage *gridSto = man->GetStorage(gridURI);
478         if(!gridSto) {
479                 Log("SHUTTLE",
480                         Form("StoreOCDB - cannot activate main %s storage", type));
481                 return kFALSE;
482         }
483
484         gridIds = gridSto->GetQueryCDBList();
485
486         // get objects previously stored in local CDB
487         AliCDBStorage *localSto = man->GetStorage(localURI);
488         if(!localSto) {
489                 Log("SHUTTLE",
490                         Form("StoreOCDB - cannot activate local %s storage", type));
491                 return kFALSE;
492         }
493         AliCDBPath aPath(GetOfflineDetName(fCurrentDetector.Data()),"*","*");
494         // Local objects were stored with current run as Grid version!
495         TList* localEntries = localSto->GetAll(aPath.GetPath(), GetCurrentRun(), GetCurrentRun());
496         localEntries->SetOwner(1);
497
498         // loop on local stored objects
499         TIter localIter(localEntries);
500         AliCDBEntry *aLocEntry = 0;
501         while((aLocEntry = dynamic_cast<AliCDBEntry*> (localIter.Next()))){
502                 aLocEntry->SetOwner(1);
503                 AliCDBId aLocId = aLocEntry->GetId();
504                 aLocEntry->SetVersion(-1);
505                 aLocEntry->SetSubVersion(-1);
506
507                 // If local object is valid up to infinity we store it only if it is
508                 // the first unprocessed run!
509                 if (aLocId.GetLastRun() == AliCDBRunRange::Infinity() &&
510                         !fFirstUnprocessed[GetDetPos(fCurrentDetector)])
511                 {
512                         Log("SHUTTLE", Form("StoreOCDB - %s: object %s has validity infinite but "
513                                                 "there are previous unprocessed runs!",
514                                                 fCurrentDetector.Data(), aLocId.GetPath().Data()));
515                         continue;
516                 }
517
518                 // loop on Grid valid Id's
519                 Bool_t store = kTRUE;
520                 TIter gridIter(gridIds);
521                 AliCDBId* aGridId = 0;
522                 while((aGridId = dynamic_cast<AliCDBId*> (gridIter.Next()))){
523                         if(aGridId->GetPath() != aLocId.GetPath()) continue;
524                         // skip all objects valid up to infinity
525                         if(aGridId->GetLastRun() == AliCDBRunRange::Infinity()) continue;
526                         // if we get here, it means there's already some more recent object stored on Grid!
527                         store = kFALSE;
528                         break;
529                 }
530
531                 // If we get here, the file can be stored!
532                 Bool_t storeOk = gridSto->Put(aLocEntry);
533                 if(!store || storeOk){
534
535                         if (!store)
536                         {
537                                 Log(fCurrentDetector.Data(),
538                                         Form("StoreOCDB - A more recent object already exists in %s storage: <%s>",
539                                                 type, aGridId->ToString().Data()));
540                         } else {
541                                 Log("SHUTTLE",
542                                         Form("StoreOCDB - Object <%s> successfully put into %s storage",
543                                                 aLocId.ToString().Data(), type));
544                                 Log(fCurrentDetector.Data(),
545                                         Form("StoreOCDB - Object <%s> successfully put into %s storage",
546                                                 aLocId.ToString().Data(), type));
547                         }
548
549                         // removing local filename...
550                         TString filename;
551                         localSto->IdToFilename(aLocId, filename);
552                         AliInfo(Form("Removing local file %s", filename.Data()));
553                         RemoveFile(filename.Data());
554                         continue;
555                 } else  {
556                         Log("SHUTTLE",
557                                 Form("StoreOCDB - Grid %s storage of object <%s> failed",
558                                         type, aLocId.ToString().Data()));
559                         Log(fCurrentDetector.Data(),
560                                 Form("StoreOCDB - Grid %s storage of object <%s> failed",
561                                         type, aLocId.ToString().Data()));
562                         result = kFALSE;
563                 }
564         }
565         localEntries->Clear();
566
567         return result;
568 }
569
570 //______________________________________________________________________________________________
571 Bool_t AliShuttle::CleanReferenceStorage(const char* detector)
572 {
573         // clears the directory used to store reference files of a given subdetector
574   
575         AliCDBManager* man = AliCDBManager::Instance();
576         AliCDBStorage* sto = man->GetStorage(fgkLocalRefStorage);
577         TString localBaseFolder = sto->GetBaseFolder();
578
579         TString targetDir = GetRefFilePrefix(localBaseFolder.Data(), detector);
580         
581         Log("SHUTTLE", Form("Cleaning %s", targetDir.Data()));
582
583         TString begin;
584         begin.Form("%d_", GetCurrentRun());
585         
586         TSystemDirectory* baseDir = new TSystemDirectory("/", targetDir);
587         if (!baseDir)
588                 return kTRUE;
589                 
590         TList* dirList = baseDir->GetListOfFiles();
591         delete baseDir;
592         
593         if (!dirList) return kTRUE;
594                         
595         if (dirList->GetEntries() < 3) 
596         {
597                 delete dirList;
598                 return kTRUE;
599         }
600                                 
601         Int_t nDirs = 0, nDel = 0;
602         TIter dirIter(dirList);
603         TSystemFile* entry = 0;
604
605         Bool_t success = kTRUE;
606         
607         while ((entry = dynamic_cast<TSystemFile*> (dirIter.Next())))
608         {                                       
609                 if (entry->IsDirectory())
610                         continue;
611                 
612                 TString fileName(entry->GetName());
613                 if (!fileName.BeginsWith(begin))
614                         continue;
615                         
616                 nDirs++;
617                                                 
618                 // delete file
619                 Int_t result = gSystem->Unlink(fileName.Data());
620                 
621                 if (result)
622                 {
623                         Log("SHUTTLE", Form("Could not delete file %s!", fileName.Data()));
624                         success = kFALSE;
625                 } else {
626                         nDel++;
627                 }
628         }
629
630         if(nDirs > 0)
631                 Log("SHUTTLE", Form("CleanReferenceStorage - %d (over %d) reference files in folder %s were deleted.", 
632                         nDel, nDirs, targetDir.Data()));
633
634                 
635         delete dirList;
636         return success;
637
638
639
640
641
642
643   Int_t result = gSystem->GetPathInfo(targetDir, 0, (Long64_t*) 0, 0, 0);
644   if (result == 0)
645   {
646     // delete directory
647     result = gSystem->Exec(Form("rm -r %s", targetDir.Data()));
648     if (result != 0)
649     {  
650       Log("SHUTTLE", Form("StoreReferenceFile - Could not clear directory %s", targetDir.Data()));
651       return kFALSE;
652     }
653   }
654
655   result = gSystem->mkdir(targetDir, kTRUE);
656   if (result != 0)
657   {
658     Log("SHUTTLE", Form("StoreReferenceFile - Error creating base directory %s", targetDir.Data()));
659     return kFALSE;
660   }
661         
662   return kTRUE;
663 }
664
665 //______________________________________________________________________________________________
666 Bool_t AliShuttle::StoreReferenceFile(const char* detector, const char* localFile, const char* gridFileName)
667 {
668         //
669         // Stores reference file directly (without opening it). This function stores the file locally.
670         //
671         // The file is stored under the following location: 
672         // <base folder of local reference storage>/<DET>/<RUN#>_<gridFileName>
673         // where <gridFileName> is the second parameter given to the function
674         // 
675         
676         if (fTestMode & kErrorStorage)
677         {
678                 Log(fCurrentDetector, "StoreReferenceFile - In TESTMODE - Simulating error while storing locally");
679                 return kFALSE;
680         }
681         
682         AliCDBManager* man = AliCDBManager::Instance();
683         AliCDBStorage* sto = man->GetStorage(fgkLocalRefStorage);
684         
685         TString localBaseFolder = sto->GetBaseFolder();
686         
687         TString targetDir = GetRefFilePrefix(localBaseFolder.Data(), detector); 
688         
689         //try to open folder, if does not exist
690         void* dir = gSystem->OpenDirectory(targetDir.Data());
691         if (dir == NULL) {
692                 if (gSystem->mkdir(targetDir.Data(), kTRUE)) {
693                         Log("SHUTTLE", Form("Can't open directory <%s>", targetDir.Data()));
694                         return kFALSE;
695                 }
696
697         } else {
698                 gSystem->FreeDirectory(dir);
699         }
700
701         TString target;
702         target.Form("%s/%d_%s", targetDir.Data(), GetCurrentRun(), gridFileName);
703         
704         Int_t result = gSystem->GetPathInfo(localFile, 0, (Long64_t*) 0, 0, 0);
705         if (result)
706         {
707                 Log("SHUTTLE", Form("StoreReferenceFile - %s does not exist", localFile));
708                 return kFALSE;
709         }
710
711         result = gSystem->CopyFile(localFile, target);
712
713         if (result == 0)
714         {
715                 Log("SHUTTLE", Form("StoreReferenceFile - File %s stored locally to %s", localFile, target.Data()));
716                 return kTRUE;
717         }
718         else
719         {
720                 Log("SHUTTLE", Form("StoreReferenceFile - Could not store file %s to %s!. Error code = %d", 
721                                 localFile, target.Data(), result));
722                 return kFALSE;
723         }       
724 }
725
726 //______________________________________________________________________________________________
727 Bool_t AliShuttle::StoreRefFilesToGrid()
728 {
729         //
730         // Transfers the reference file to the Grid.
731         //
732         // The files are stored under the following location: 
733         // <base folder of reference storage>/<DET>/<RUN#>_<gridFileName>
734         //
735         
736         AliCDBManager* man = AliCDBManager::Instance();
737         AliCDBStorage* sto = man->GetStorage(fgkLocalRefStorage);
738         if (!sto)
739                 return kFALSE;
740         TString localBaseFolder = sto->GetBaseFolder();
741                 
742         TString dir = GetRefFilePrefix(localBaseFolder.Data(), fCurrentDetector.Data());
743                 
744         AliCDBStorage* gridSto = man->GetStorage(fgkMainRefStorage);
745         if (!gridSto)
746                 return kFALSE;
747         
748         TString gridBaseFolder = gridSto->GetBaseFolder();
749
750         TString alienDir = GetRefFilePrefix(gridBaseFolder.Data(), fCurrentDetector.Data());
751         
752         TString begin;
753         begin.Form("%d_", GetCurrentRun());
754         
755         TSystemDirectory* baseDir = new TSystemDirectory("/", dir);
756         if (!baseDir)
757                 return kTRUE;
758                 
759         TList* dirList = baseDir->GetListOfFiles();
760         delete baseDir;
761         
762         if (!dirList) return kTRUE;
763                 
764         if (dirList->GetEntries() < 3) 
765         {
766                 delete dirList;
767                 return kTRUE;
768         }
769                         
770         if (!gGrid)
771         { 
772                 Log("SHUTTLE", "Connection to Grid failed: Cannot continue!");
773                 delete dirList;
774                 return kFALSE;
775         }
776         
777         Int_t nDirs = 0, nTransfer = 0;
778         TIter dirIter(dirList);
779         TSystemFile* entry = 0;
780
781         Bool_t success = kTRUE;
782         Bool_t first = kTRUE;
783         
784         while ((entry = dynamic_cast<TSystemFile*> (dirIter.Next())))
785         {                       
786                 if (entry->IsDirectory())
787                         continue;
788                         
789                 TString fileName(entry->GetName());
790                 if (!fileName.BeginsWith(begin))
791                         continue;
792                         
793                 nDirs++;
794                         
795                 if (first)
796                 {
797                         first = kFALSE;
798                         // check that DET folder exists, otherwise create it
799                         TGridResult* result = gGrid->Ls(alienDir.Data(), "a");
800                         
801                         if (!result)
802                         {
803                                 delete dirList;
804                                 return kFALSE;
805                         }
806                         
807                         if (!result->GetFileName(1)) // TODO: It looks like element 0 is always 0!!
808                         {
809                                 if (!gGrid->Mkdir(alienDir.Data(),"",0))
810                                 {
811                                         Log("SHUTTLE", Form("StoreRefFilesToGrid - Cannot create directory %s",
812                                                         alienDir.Data()));
813                                         delete dirList;
814                                         return kFALSE;
815                                 } else {
816                                         Log("SHUTTLE",Form("Folder %s created", alienDir.Data()));
817                                 }
818                                 
819                         } else {
820                                         Log("SHUTTLE",Form("Folder %s found", alienDir.Data()));
821                         }
822                 }
823                         
824                 TString fullLocalPath;
825                 fullLocalPath.Form("%s/%s", dir.Data(), fileName.Data());
826                 
827                 TString fullGridPath;
828                 fullGridPath.Form("alien://%s/%s", alienDir.Data(), fileName.Data());
829
830                 TFileMerger fileMerger;
831                 Bool_t result = fileMerger.Cp(fullLocalPath, fullGridPath);
832                 
833                 if (result)
834                 {
835                         Log("SHUTTLE", Form("StoreRefFilesToGrid - Copying local file %s to %s succeeded!", fullLocalPath.Data(), fullGridPath.Data()));
836                         RemoveFile(fullLocalPath);
837                         nTransfer++;
838                 }
839                 else
840                 {
841                         Log("SHUTTLE", Form("StoreRefFilesToGrid - Copying local file %s to %s FAILED!", fullLocalPath.Data(), fullGridPath.Data()));
842                         success = kFALSE;
843                 }
844         }
845
846         Log("SHUTTLE", Form("StoreRefFilesToGrid - %d (over %d) reference files in folder %s copied to Grid.", nTransfer, nDirs, dir.Data()));
847
848                 
849         delete dirList;
850         return success;
851 }
852
853 //______________________________________________________________________________________________
854 const char* AliShuttle::GetRefFilePrefix(const char* base, const char* detector)
855 {
856         //
857         // Get folder name of reference files 
858         //
859
860         TString offDetStr(GetOfflineDetName(detector));
861         TString dir;
862         if (offDetStr == "ITS" || offDetStr == "MUON" || offDetStr == "PHOS")
863         {
864                 dir.Form("%s/%s/%s", base, offDetStr.Data(), detector);
865         } else {
866                 dir.Form("%s/%s", base, offDetStr.Data());
867         }
868         
869         return dir.Data();
870         
871
872 }
873 //______________________________________________________________________________________________
874 void AliShuttle::CleanLocalStorage(const TString& uri)
875 {
876         //
877         // Called in case the preprocessor is declared failed. Remove remaining objects from the local storages.
878         //
879
880         const char* type = 0;
881         if(uri == fgkLocalCDB) {
882                 type = "OCDB";
883         } else if(uri == fgkLocalRefStorage) {
884                 type = "Reference";
885         } else {
886                 AliError(Form("Invalid storage URI: %s", uri.Data()));
887                 return;
888         }
889
890         AliCDBManager* man = AliCDBManager::Instance();
891
892         // open local storage
893         AliCDBStorage *localSto = man->GetStorage(uri);
894         if(!localSto) {
895                 Log("SHUTTLE",
896                         Form("CleanLocalStorage - cannot activate local %s storage", type));
897                 return;
898         }
899
900         TString filename(Form("%s/%s/*/Run*_v%d_s*.root",
901                 localSto->GetBaseFolder().Data(), GetOfflineDetName(fCurrentDetector.Data()), GetCurrentRun()));
902
903         AliInfo(Form("filename = %s", filename.Data()));
904
905         AliInfo(Form("Removing remaining local files from run %d and detector %s ...",
906                 GetCurrentRun(), fCurrentDetector.Data()));
907
908         RemoveFile(filename.Data());
909
910 }
911
912 //______________________________________________________________________________________________
913 void AliShuttle::RemoveFile(const char* filename)
914 {
915         //
916         // removes local file
917         //
918
919         TString command(Form("rm -f %s", filename));
920
921         Int_t result = gSystem->Exec(command.Data());
922         if(result != 0)
923         {
924                 Log("SHUTTLE", Form("RemoveFile - %s: Cannot remove file %s!",
925                         fCurrentDetector.Data(), filename));
926         }
927 }
928
929 //______________________________________________________________________________________________
930 AliShuttleStatus* AliShuttle::ReadShuttleStatus()
931 {
932         //
933         // Reads the AliShuttleStatus from the CDB
934         //
935
936         if (fStatusEntry){
937                 delete fStatusEntry;
938                 fStatusEntry = 0;
939         }
940
941         fStatusEntry = AliCDBManager::Instance()->GetStorage(GetLocalCDB())
942                 ->Get(Form("/SHUTTLE/STATUS/%s", fCurrentDetector.Data()), GetCurrentRun());
943
944         if (!fStatusEntry) return 0;
945         fStatusEntry->SetOwner(1);
946
947         AliShuttleStatus* status = dynamic_cast<AliShuttleStatus*> (fStatusEntry->GetObject());
948         if (!status) {
949                 AliError("Invalid object stored to CDB!");
950                 return 0;
951         }
952
953         return status;
954 }
955
956 //______________________________________________________________________________________________
957 Bool_t AliShuttle::WriteShuttleStatus(AliShuttleStatus* status)
958 {
959         //
960         // writes the status for one subdetector
961         //
962
963         if (fStatusEntry){
964                 delete fStatusEntry;
965                 fStatusEntry = 0;
966         }
967
968         Int_t run = GetCurrentRun();
969
970         AliCDBId id(AliCDBPath("SHUTTLE", "STATUS", fCurrentDetector), run, run);
971
972         fStatusEntry = new AliCDBEntry(status, id, new AliCDBMetaData);
973         fStatusEntry->SetOwner(1);
974
975         UInt_t result = AliCDBManager::Instance()->GetStorage(fgkLocalCDB)->Put(fStatusEntry);
976
977         if (!result) {
978                 Log("SHUTTLE", Form("WriteShuttleStatus - Failed for %s, run %d",
979                                                 fCurrentDetector.Data(), run));
980                 return kFALSE;
981         }
982         
983         SendMLInfo();
984
985         return kTRUE;
986 }
987
988 //______________________________________________________________________________________________
989 void AliShuttle::UpdateShuttleStatus(AliShuttleStatus::Status newStatus, Bool_t increaseCount)
990 {
991         //
992         // changes the AliShuttleStatus for the given detector and run to the given status
993         //
994
995         if (!fStatusEntry){
996                 AliError("UNEXPECTED: fStatusEntry empty");
997                 return;
998         }
999
1000         AliShuttleStatus* status = dynamic_cast<AliShuttleStatus*> (fStatusEntry->GetObject());
1001
1002         if (!status){
1003                 Log("SHUTTLE", "UNEXPECTED: status could not be read from current CDB entry");
1004                 return;
1005         }
1006
1007         TString actionStr = Form("UpdateShuttleStatus - %s: Changing state from %s to %s",
1008                                 fCurrentDetector.Data(),
1009                                 status->GetStatusName(),
1010                                 status->GetStatusName(newStatus));
1011         Log("SHUTTLE", actionStr);
1012         SetLastAction(actionStr);
1013
1014         status->SetStatus(newStatus);
1015         if (increaseCount) status->IncreaseCount();
1016
1017         AliCDBManager::Instance()->GetStorage(fgkLocalCDB)->Put(fStatusEntry);
1018
1019         SendMLInfo();
1020 }
1021
1022 //______________________________________________________________________________________________
1023 void AliShuttle::SendMLInfo()
1024 {
1025         //
1026         // sends ML information about the current status of the current detector being processed
1027         //
1028         
1029         AliShuttleStatus* status = dynamic_cast<AliShuttleStatus*> (fStatusEntry->GetObject());
1030         
1031         if (!status){
1032                 Log("SHUTTLE", "SendMLInfo - UNEXPECTED: status could not be read from current CDB entry");
1033                 return;
1034         }
1035         
1036         TMonaLisaText  mlStatus(Form("%s_status", fCurrentDetector.Data()), status->GetStatusName());
1037         TMonaLisaValue mlRetryCount(Form("%s_count", fCurrentDetector.Data()), status->GetCount());
1038
1039         TList mlList;
1040         mlList.Add(&mlStatus);
1041         mlList.Add(&mlRetryCount);
1042
1043         fMonaLisa->SendParameters(&mlList);
1044 }
1045
1046 //______________________________________________________________________________________________
1047 Bool_t AliShuttle::ContinueProcessing()
1048 {
1049         // this function reads the AliShuttleStatus information from CDB and
1050         // checks if the processing should be continued
1051         // if yes it returns kTRUE and updates the AliShuttleStatus with nextStatus
1052
1053         if (!fConfig->HostProcessDetector(fCurrentDetector)) return kFALSE;
1054
1055         AliPreprocessor* aPreprocessor =
1056                 dynamic_cast<AliPreprocessor*> (fPreprocessorMap.GetValue(fCurrentDetector));
1057         if (!aPreprocessor)
1058         {
1059                 AliInfo(Form("%s: no preprocessor registered", fCurrentDetector.Data()));
1060                 return kFALSE;
1061         }
1062
1063         AliShuttleLogbookEntry::Status entryStatus =
1064                 fLogbookEntry->GetDetectorStatus(fCurrentDetector);
1065
1066         if(entryStatus != AliShuttleLogbookEntry::kUnprocessed) {
1067                 AliInfo(Form("ContinueProcessing - %s is %s",
1068                                 fCurrentDetector.Data(),
1069                                 fLogbookEntry->GetDetectorStatusName(entryStatus)));
1070                 return kFALSE;
1071         }
1072
1073         // if we get here, according to Shuttle logbook subdetector is in UNPROCESSED state
1074
1075         // check if current run is first unprocessed run for current detector
1076         if (fConfig->StrictRunOrder(fCurrentDetector) &&
1077                 !fFirstUnprocessed[GetDetPos(fCurrentDetector)])
1078         {
1079                 if (fTestMode == kNone)
1080                 {
1081                         Log("SHUTTLE", Form("ContinueProcessing - %s requires strict run ordering but this is not the first unprocessed run!"));
1082                         return kFALSE;
1083                 }
1084                 else
1085                 {
1086                         Log("SHUTTLE", Form("ContinueProcessing - In TESTMODE - Although %s requires strict run ordering and this is not the first unprocessed run, the SHUTTLE continues"));
1087                 }
1088         }
1089
1090         AliShuttleStatus* status = ReadShuttleStatus();
1091         if (!status) {
1092                 // first time
1093                 Log("SHUTTLE", Form("ContinueProcessing - %s: Processing first time",
1094                                 fCurrentDetector.Data()));
1095                 status = new AliShuttleStatus(AliShuttleStatus::kStarted);
1096                 return WriteShuttleStatus(status);
1097         }
1098
1099         // The following two cases shouldn't happen if Shuttle Logbook was correctly updated.
1100         // If it happens it may mean Logbook updating failed... let's do it now!
1101         if (status->GetStatus() == AliShuttleStatus::kDone ||
1102             status->GetStatus() == AliShuttleStatus::kFailed){
1103                 Log("SHUTTLE", Form("ContinueProcessing - %s is already %s. Updating Shuttle Logbook",
1104                                         fCurrentDetector.Data(),
1105                                         status->GetStatusName(status->GetStatus())));
1106                 UpdateShuttleLogbook(fCurrentDetector.Data(),
1107                                         status->GetStatusName(status->GetStatus()));
1108                 return kFALSE;
1109         }
1110
1111         if (status->GetStatus() == AliShuttleStatus::kStoreError) {
1112                 Log("SHUTTLE",
1113                         Form("ContinueProcessing - %s: Grid storage of one or more objects failed. Trying again now",
1114                                 fCurrentDetector.Data()));
1115                 UpdateShuttleStatus(AliShuttleStatus::kStoreStarted);
1116                 if (StoreOCDB()){
1117                         Log("SHUTTLE", Form("ContinueProcessing - %s: all objects successfully stored into main storage",
1118                                 fCurrentDetector.Data()));
1119                         UpdateShuttleStatus(AliShuttleStatus::kDone);
1120                         UpdateShuttleLogbook(fCurrentDetector.Data(), "DONE");
1121                 } else {
1122                         Log("SHUTTLE",
1123                                 Form("ContinueProcessing - %s: Grid storage failed again",
1124                                         fCurrentDetector.Data()));
1125                         UpdateShuttleStatus(AliShuttleStatus::kStoreError);
1126                 }
1127                 return kFALSE;
1128         }
1129
1130         // if we get here, there is a restart
1131         Bool_t cont = kFALSE;
1132
1133         // abort conditions
1134         if (status->GetCount() >= fConfig->GetMaxRetries()) {
1135                 Log("SHUTTLE", Form("ContinueProcessing - %s failed %d times in status %s - "
1136                                 "Updating Shuttle Logbook", fCurrentDetector.Data(),
1137                                 status->GetCount(), status->GetStatusName()));
1138                 UpdateShuttleLogbook(fCurrentDetector.Data(), "FAILED");
1139                 UpdateShuttleStatus(AliShuttleStatus::kFailed);
1140
1141                 // there may still be objects in local OCDB and reference storage
1142                 // and FXS databases may be not updated: do it now!
1143                 
1144                 // TODO Currently disabled, we want to keep files in case of failure!
1145                 // CleanLocalStorage(fgkLocalCDB);
1146                 // CleanLocalStorage(fgkLocalRefStorage);
1147                 // UpdateTableFailCase();
1148                 
1149                 // Send mail to detector expert!
1150                 AliInfo(Form("Sending mail to %s expert...", fCurrentDetector.Data()));
1151                 if (!SendMail())
1152                         Log("SHUTTLE", Form("ContinueProcessing - Could not send mail to %s expert",
1153                                         fCurrentDetector.Data()));
1154
1155         } else {
1156                 Log("SHUTTLE", Form("ContinueProcessing - %s: restarting. "
1157                                 "Aborted before with %s. Retry number %d.", fCurrentDetector.Data(),
1158                                 status->GetStatusName(), status->GetCount()));
1159                 Bool_t increaseCount = kTRUE;
1160                 if (status->GetStatus() == AliShuttleStatus::kDCSError || status->GetStatus() == AliShuttleStatus::kDCSStarted)
1161                         increaseCount = kFALSE;
1162                 UpdateShuttleStatus(AliShuttleStatus::kStarted, increaseCount);
1163                 cont = kTRUE;
1164         }
1165
1166         return cont;
1167 }
1168
1169 //______________________________________________________________________________________________
1170 Bool_t AliShuttle::Process(AliShuttleLogbookEntry* entry)
1171 {
1172         //
1173         // Makes data retrieval for all detectors in the configuration.
1174         // entry: Shuttle logbook entry, contains run paramenters and status of detectors
1175         // (Unprocessed, Inactive, Failed or Done).
1176         // Returns kFALSE in case of error occured and kTRUE otherwise
1177         //
1178
1179         if (!entry) return kFALSE;
1180
1181         fLogbookEntry = entry;
1182
1183         AliInfo(Form("\n\n \t\t\t^*^*^*^*^*^*^*^*^*^*^*^* run %d: START ^*^*^*^*^*^*^*^*^*^*^*^* \n",
1184                                         GetCurrentRun()));
1185
1186         // create ML instance that monitors this run
1187         fMonaLisa = new TMonaLisaWriter(Form("%d", GetCurrentRun()), "SHUTTLE", "aliendb1.cern.ch");
1188         // disable monitoring of other parameters that come e.g. from TFile
1189         gMonitoringWriter = 0;
1190
1191         // Send the information to ML
1192         TMonaLisaText  mlStatus("SHUTTLE_status", "Processing");
1193         TMonaLisaText  mlRunType("SHUTTLE_runtype", Form("%s (%s)", entry->GetRunType(), entry->GetRunParameter("log")));
1194
1195         TList mlList;
1196         mlList.Add(&mlStatus);
1197         mlList.Add(&mlRunType);
1198
1199         fMonaLisa->SendParameters(&mlList);
1200
1201         if (fLogbookEntry->IsDone())
1202         {
1203                 Log("SHUTTLE","Process - Shuttle is already DONE. Updating logbook");
1204                 UpdateShuttleLogbook("shuttle_done");
1205                 fLogbookEntry = 0;
1206                 return kTRUE;
1207         }
1208
1209         // read test mode if flag is set
1210         if (fReadTestMode)
1211         {
1212                 fTestMode = kNone;
1213                 TString logEntry(entry->GetRunParameter("log"));
1214                 //printf("log entry = %s\n", logEntry.Data());
1215                 TString searchStr("Testmode: ");
1216                 Int_t pos = logEntry.Index(searchStr.Data());
1217                 //printf("%d\n", pos);
1218                 if (pos >= 0)
1219                 {
1220                         TSubString subStr = logEntry(pos + searchStr.Length(), logEntry.Length());
1221                         //printf("%s\n", subStr.String().Data());
1222                         TString newStr(subStr.Data());
1223                         TObjArray* token = newStr.Tokenize(' ');
1224                         if (token)
1225                         {
1226                                 //token->Print();
1227                                 TObjString* tmpStr = dynamic_cast<TObjString*> (token->First());
1228                                 if (tmpStr)
1229                                 {
1230                                         Int_t testMode = tmpStr->String().Atoi();
1231                                         if (testMode > 0)
1232                                         {
1233                                                 Log("SHUTTLE", Form("Enabling test mode %d", testMode));
1234                                                 SetTestMode((TestMode) testMode);
1235                                         }
1236                                 }
1237                                 delete token;          
1238                         }
1239                 }
1240         }
1241         
1242         Log("SHUTTLE", Form("The test mode flag is %d", (Int_t) fTestMode));
1243         
1244         fLogbookEntry->Print("all");
1245
1246         // Initialization
1247         Bool_t hasError = kFALSE;
1248
1249         AliCDBStorage *mainCDBSto = AliCDBManager::Instance()->GetStorage(fgkMainCDB);
1250         if(mainCDBSto) mainCDBSto->QueryCDB(GetCurrentRun());
1251         AliCDBStorage *mainRefSto = AliCDBManager::Instance()->GetStorage(fgkMainRefStorage);
1252         if(mainRefSto) mainRefSto->QueryCDB(GetCurrentRun());
1253
1254         // Loop on detectors in the configuration
1255         TIter iter(fConfig->GetDetectors());
1256         TObjString* aDetector = 0;
1257
1258         while ((aDetector = (TObjString*) iter.Next()))
1259         {
1260                 fCurrentDetector = aDetector->String();
1261
1262                 if (ContinueProcessing() == kFALSE) continue;
1263
1264                 AliInfo(Form("\n\n \t\t\t****** run %d - %s: START  ******",
1265                                                 GetCurrentRun(), aDetector->GetName()));
1266
1267                 for(Int_t iSys=0;iSys<3;iSys++) fFXSCalled[iSys]=kFALSE;
1268
1269                 Log(fCurrentDetector.Data(), "Starting processing");
1270
1271                 Int_t pid = fork();
1272
1273                 if (pid < 0)
1274                 {
1275                         Log("SHUTTLE", "ERROR: Forking failed");
1276                 }
1277                 else if (pid > 0)
1278                 {
1279                         // parent
1280                         AliInfo(Form("In parent process of %d - %s: Starting monitoring",
1281                                                         GetCurrentRun(), aDetector->GetName()));
1282
1283                         Long_t begin = time(0);
1284
1285                         int status; // to be used with waitpid, on purpose an int (not Int_t)!
1286                         while (waitpid(pid, &status, WNOHANG) == 0)
1287                         {
1288                                 Long_t expiredTime = time(0) - begin;
1289
1290                                 if (expiredTime > fConfig->GetPPTimeOut())
1291                                 {
1292                                         TString tmp;
1293                                         tmp.Form("Process of %s time out. Run time: %d seconds. Killing...",
1294                                                                 fCurrentDetector.Data(), expiredTime);
1295                                         Log("SHUTTLE", tmp);
1296                                         Log(fCurrentDetector, tmp);
1297
1298                                         kill(pid, 9);
1299
1300                                         UpdateShuttleStatus(AliShuttleStatus::kPPTimeOut);
1301                                         hasError = kTRUE;
1302
1303                                         gSystem->Sleep(1000);
1304                                 }
1305                                 else
1306                                 {
1307                                         gSystem->Sleep(1000);
1308                                         
1309                                         TString checkStr;
1310                                         checkStr.Form("ps -o vsize --pid %d | tail -n 1", pid);
1311                                         FILE* pipe = gSystem->OpenPipe(checkStr, "r");
1312                                         if (!pipe)
1313                                         {
1314                                                 Log("SHUTTLE", Form("Error: Could not open pipe to %s", checkStr.Data()));
1315                                                 continue;
1316                                         }
1317                                                 
1318                                         char buffer[100];
1319                                         if (!fgets(buffer, 100, pipe))
1320                                         {
1321                                                 Log("SHUTTLE", "Error: ps did not return anything");
1322                                                 gSystem->ClosePipe(pipe);
1323                                                 continue;
1324                                         }
1325                                         gSystem->ClosePipe(pipe);
1326                                         
1327                                         //Log("SHUTTLE", Form("ps returned %s", buffer));
1328                                         
1329                                         Int_t mem = 0;
1330                                         if ((sscanf(buffer, "%d\n", &mem) != 1) || !mem)
1331                                         {
1332                                                 Log("SHUTTLE", "Error: Could not parse output of ps");
1333                                                 continue;
1334                                         }
1335                                         
1336                                         if (expiredTime % 60 == 0)
1337                                                 Log("SHUTTLE", Form("%s: Checking process. Run time: %d seconds - Memory consumption: %d KB",
1338                                                                 fCurrentDetector.Data(), expiredTime, mem));
1339                                         
1340                                         if (mem > fConfig->GetPPMaxMem())
1341                                         {
1342                                                 TString tmp;
1343                                                 tmp.Form("Process exceeds maximum allowed memory (%d KB > %d KB). Killing...",
1344                                                         mem, fConfig->GetPPMaxMem());
1345                                                 Log("SHUTTLE", tmp);
1346                                                 Log(fCurrentDetector, tmp);
1347         
1348                                                 kill(pid, 9);
1349         
1350                                                 UpdateShuttleStatus(AliShuttleStatus::kPPOutOfMemory);
1351                                                 hasError = kTRUE;
1352         
1353                                                 gSystem->Sleep(1000);
1354                                         }
1355                                 }
1356                         }
1357
1358                         AliInfo(Form("In parent process of %d - %s: Client has terminated.",
1359                                                                 GetCurrentRun(), aDetector->GetName()));
1360
1361                         if (WIFEXITED(status))
1362                         {
1363                                 Int_t returnCode = WEXITSTATUS(status);
1364
1365                                 Log("SHUTTLE", Form("%s: the return code is %d", fCurrentDetector.Data(),
1366                                                                                 returnCode));
1367
1368                                 if (returnCode == 0) hasError = kTRUE;
1369                         }
1370                 }
1371                 else if (pid == 0)
1372                 {
1373                         // client
1374                         AliInfo(Form("In client process of %d - %s", GetCurrentRun(), aDetector->GetName()));
1375
1376                         AliInfo("Redirecting output...");
1377
1378                         if ((freopen(GetLogFileName(fCurrentDetector), "a", stdout)) == 0)
1379                         {
1380                                 Log("SHUTTLE", "Could not freopen stdout");
1381                         }
1382                         else
1383                         {
1384                                 fOutputRedirected = kTRUE;
1385                                 if ((dup2(fileno(stdout), fileno(stderr))) < 0)
1386                                         Log("SHUTTLE", "Could not redirect stderr");
1387                                 
1388                         }
1389                         
1390                         Bool_t success = ProcessCurrentDetector();
1391                         if (success) // Preprocessor finished successfully!
1392                         { 
1393                                 // Update time_processed field in FXS DB
1394                                 if (UpdateTable() == kFALSE)
1395                                         Log("SHUTTLE", Form("Process - %s: Could not update FXS databases!"));
1396
1397                                 // Transfer the data from local storage to main storage (Grid)
1398                                 UpdateShuttleStatus(AliShuttleStatus::kStoreStarted);
1399                                 if (StoreOCDB() == kFALSE)
1400                                 {
1401                                         AliInfo(Form("\n \t\t\t****** run %d - %s: STORAGE ERROR ****** \n\n",
1402                                                         GetCurrentRun(), aDetector->GetName()));
1403                                         UpdateShuttleStatus(AliShuttleStatus::kStoreError);
1404                                         success = kFALSE;
1405                                 } else {
1406                                         AliInfo(Form("\n \t\t\t****** run %d - %s: DONE ****** \n\n",
1407                                                         GetCurrentRun(), aDetector->GetName()));
1408                                         UpdateShuttleStatus(AliShuttleStatus::kDone);
1409                                         UpdateShuttleLogbook(fCurrentDetector, "DONE");
1410                                 }
1411                         }
1412
1413                         for (UInt_t iSys=0; iSys<3; iSys++)
1414                         {
1415                                 if (fFXSCalled[iSys]) fFXSlist[iSys].Clear();
1416                         }
1417
1418                         AliInfo(Form("Client process of %d - %s is exiting now with %d.",
1419                                                         GetCurrentRun(), aDetector->GetName(), success));
1420
1421                         // the client exits here
1422                         gSystem->Exit(success);
1423
1424                         AliError("We should never get here!!!");
1425                 }
1426         }
1427
1428         AliInfo(Form("\n\n \t\t\t^*^*^*^*^*^*^*^*^*^*^*^* run %d: FINISH ^*^*^*^*^*^*^*^*^*^*^*^* \n",
1429                                                         GetCurrentRun()));
1430
1431         //check if shuttle is done for this run, if so update logbook
1432         TObjArray checkEntryArray;
1433         checkEntryArray.SetOwner(1);
1434         TString whereClause = Form("where run=%d", GetCurrentRun());
1435         if (!QueryShuttleLogbook(whereClause.Data(), checkEntryArray) || checkEntryArray.GetEntries() == 0) {
1436                 Log("SHUTTLE", Form("Process - Warning: Cannot check status of run %d on Shuttle logbook!",
1437                                                 GetCurrentRun()));
1438                 return hasError == kFALSE;
1439         }
1440
1441         AliShuttleLogbookEntry* checkEntry = dynamic_cast<AliShuttleLogbookEntry*>
1442                                                 (checkEntryArray.At(0));
1443
1444         if (checkEntry)
1445         {
1446                 if (checkEntry->IsDone())
1447                 {
1448                         Log("SHUTTLE","Process - Shuttle is DONE. Updating logbook");
1449                         UpdateShuttleLogbook("shuttle_done");
1450                 }
1451                 else
1452                 {
1453                         for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
1454                         {
1455                                 if (checkEntry->GetDetectorStatus(iDet) == AliShuttleLogbookEntry::kUnprocessed)
1456                                 {
1457                                         AliDebug(2, Form("Run %d: setting %s as \"not first time unprocessed\"",
1458                                                         checkEntry->GetRun(), GetDetName(iDet)));
1459                                         fFirstUnprocessed[iDet] = kFALSE;
1460                                 }
1461                         }
1462                 }
1463         }
1464
1465         // remove ML instance
1466         delete fMonaLisa;
1467         fMonaLisa = 0;
1468
1469         fLogbookEntry = 0;
1470
1471         return hasError == kFALSE;
1472 }
1473
1474 //______________________________________________________________________________________________
1475 Bool_t AliShuttle::ProcessCurrentDetector()
1476 {
1477         //
1478         // Makes data retrieval just for a specific detector (fCurrentDetector).
1479         // Threre should be a configuration for this detector.
1480
1481         AliInfo(Form("Retrieving values for %s, run %d", fCurrentDetector.Data(), GetCurrentRun()));
1482
1483         if (!CleanReferenceStorage(fCurrentDetector.Data()))
1484                 return kFALSE;
1485
1486         TMap dcsMap;
1487         dcsMap.SetOwner(1);
1488
1489         Bool_t aDCSError = kFALSE;
1490
1491         // call preprocessor
1492         AliPreprocessor* aPreprocessor =
1493                 dynamic_cast<AliPreprocessor*> (fPreprocessorMap.GetValue(fCurrentDetector));
1494
1495         aPreprocessor->Initialize(GetCurrentRun(), GetCurrentStartTime(), GetCurrentEndTime());
1496
1497         Bool_t processDCS = aPreprocessor->ProcessDCS();
1498
1499         if (!processDCS)
1500         {
1501                 Log(fCurrentDetector, "The preprocessor requested to skip the retrieval of DCS values");
1502         }
1503         else if (fTestMode & kSkipDCS)
1504         {
1505                 Log(fCurrentDetector, "In TESTMODE - Skipping DCS processing!");
1506         } 
1507         else if (fTestMode & kErrorDCS)
1508         {
1509                 Log(fCurrentDetector, "In TESTMODE - Simulating DCS error");
1510                 UpdateShuttleStatus(AliShuttleStatus::kDCSStarted);
1511                 UpdateShuttleStatus(AliShuttleStatus::kDCSError);
1512                 return kFALSE;
1513         } else {
1514
1515                 UpdateShuttleStatus(AliShuttleStatus::kDCSStarted);
1516
1517                 TString host(fConfig->GetDCSHost(fCurrentDetector));
1518                 Int_t port = fConfig->GetDCSPort(fCurrentDetector);
1519
1520                 // Retrieval of Aliases
1521                 TObjString* anAlias = 0;
1522                 Int_t iAlias = 0;
1523                 Int_t nTotAliases= ((TMap*)fConfig->GetDCSAliases(fCurrentDetector))->GetEntries();
1524                 TIter iterAliases(fConfig->GetDCSAliases(fCurrentDetector));
1525                 while ((anAlias = (TObjString*) iterAliases.Next()))
1526                 {
1527                         TObjArray *valueSet = new TObjArray();
1528                         valueSet->SetOwner(1);
1529
1530                         iAlias++;
1531                         aDCSError = (GetValueSet(host, port, anAlias->String(), valueSet, kAlias) == 0);
1532
1533                         if(!aDCSError)
1534                         {
1535                                 if (((iAlias-1) % 500) == 0 || iAlias == nTotAliases)
1536                                         AliInfo(Form("Alias %s (%d of %d) - %d values collected",
1537                                                         anAlias->GetName(), iAlias, nTotAliases, valueSet->GetEntriesFast()));
1538                                 dcsMap.Add(anAlias->Clone(), valueSet);
1539                         } else {
1540                                 Log(fCurrentDetector,
1541                                         Form("ProcessCurrentDetector - Error while retrieving alias %s",
1542                                                 anAlias->GetName()));
1543                                 UpdateShuttleStatus(AliShuttleStatus::kDCSError);
1544                                 dcsMap.DeleteAll();
1545                                 return kFALSE;
1546                         }
1547                 }
1548
1549                 // Retrieval of Data Points
1550                 TObjString* aDP = 0;
1551                 Int_t iDP = 0;
1552                 Int_t nTotDPs= ((TMap*)fConfig->GetDCSDataPoints(fCurrentDetector))->GetEntries();
1553                 TIter iterDP(fConfig->GetDCSDataPoints(fCurrentDetector));
1554                 while ((aDP = (TObjString*) iterDP.Next()))
1555                 {
1556                         TObjArray *valueSet = new TObjArray();
1557                         valueSet->SetOwner(1);
1558                         if (((iDP-1) % 500) == 0 || iDP == nTotDPs)
1559                                 AliInfo(Form("Querying DCS archive: DP %s (%d of %d)",
1560                                                 aDP->GetName(), iDP++, nTotDPs));
1561                         aDCSError = (GetValueSet(host, port, aDP->String(), valueSet, kDP) == 0);
1562
1563                         if(!aDCSError)
1564                         {
1565                                 dcsMap.Add(aDP->Clone(), valueSet);
1566                         } else {
1567                                 Log(fCurrentDetector,
1568                                         Form("ProcessCurrentDetector - Error while retrieving data point %s",
1569                                                 aDP->GetName()));
1570                                 UpdateShuttleStatus(AliShuttleStatus::kDCSError);
1571                                 dcsMap.DeleteAll();
1572                                 return kFALSE;
1573                         }
1574                 }
1575         }
1576
1577         // DCS Archive DB processing successful. Call Preprocessor!
1578         UpdateShuttleStatus(AliShuttleStatus::kPPStarted);
1579
1580         UInt_t returnValue = aPreprocessor->Process(&dcsMap);
1581
1582         if (returnValue > 0) // Preprocessor error!
1583         {
1584                 Log(fCurrentDetector, Form("Preprocessor failed. Process returned %d.", returnValue));
1585                 UpdateShuttleStatus(AliShuttleStatus::kPPError);
1586                 dcsMap.DeleteAll();
1587                 return kFALSE;
1588         }
1589         
1590         // preprocessor ok!
1591         UpdateShuttleStatus(AliShuttleStatus::kPPDone);
1592         Log(fCurrentDetector, Form("ProcessCurrentDetector - %s preprocessor returned success",
1593                                 fCurrentDetector.Data()));
1594
1595         dcsMap.DeleteAll();
1596
1597         return kTRUE;
1598 }
1599
1600 //______________________________________________________________________________________________
1601 Bool_t AliShuttle::QueryShuttleLogbook(const char* whereClause,
1602                 TObjArray& entries)
1603 {
1604         // Query DAQ's Shuttle logbook and fills detector status object.
1605         // Call QueryRunParameters to query DAQ logbook for run parameters.
1606         //
1607
1608         entries.SetOwner(1);
1609
1610         // check connection, in case connect
1611         if(!Connect(3)) return kFALSE;
1612
1613         TString sqlQuery;
1614         sqlQuery = Form("select * from %s %s order by run", fConfig->GetShuttlelbTable(), whereClause);
1615
1616         TSQLResult* aResult = fServer[3]->Query(sqlQuery);
1617         if (!aResult) {
1618                 AliError(Form("Can't execute query <%s>!", sqlQuery.Data()));
1619                 return kFALSE;
1620         }
1621
1622         AliDebug(2,Form("Query = %s", sqlQuery.Data()));
1623
1624         if(aResult->GetRowCount() == 0) {
1625                 AliInfo("No entries in Shuttle Logbook match request");
1626                 delete aResult;
1627                 return kTRUE;
1628         }
1629
1630         // TODO Check field count!
1631         const UInt_t nCols = 22;
1632         if (aResult->GetFieldCount() != (Int_t) nCols) {
1633                 AliError("Invalid SQL result field number!");
1634                 delete aResult;
1635                 return kFALSE;
1636         }
1637
1638         TSQLRow* aRow;
1639         while ((aRow = aResult->Next())) {
1640                 TString runString(aRow->GetField(0), aRow->GetFieldLength(0));
1641                 Int_t run = runString.Atoi();
1642
1643                 AliShuttleLogbookEntry *entry = QueryRunParameters(run);
1644                 if (!entry)
1645                         continue;
1646
1647                 // loop on detectors
1648                 for(UInt_t ii = 0; ii < nCols; ii++)
1649                         entry->SetDetectorStatus(aResult->GetFieldName(ii), aRow->GetField(ii));
1650
1651                 entries.AddLast(entry);
1652                 delete aRow;
1653         }
1654
1655         delete aResult;
1656         return kTRUE;
1657 }
1658
1659 //______________________________________________________________________________________________
1660 AliShuttleLogbookEntry* AliShuttle::QueryRunParameters(Int_t run)
1661 {
1662         //
1663         // Retrieve run parameters written in the DAQ logbook and sets them into AliShuttleLogbookEntry object
1664         //
1665
1666         // check connection, in case connect
1667         if (!Connect(3))
1668                 return 0;
1669
1670         TString sqlQuery;
1671         sqlQuery.Form("select * from %s where run=%d", fConfig->GetDAQlbTable(), run);
1672
1673         TSQLResult* aResult = fServer[3]->Query(sqlQuery);
1674         if (!aResult) {
1675                 AliError(Form("Can't execute query <%s>!", sqlQuery.Data()));
1676                 return 0;
1677         }
1678
1679         if (aResult->GetRowCount() == 0) {
1680                 Log("SHUTTLE", Form("QueryRunParameters - No entry in DAQ Logbook for run %d. Skipping", run));
1681                 delete aResult;
1682                 return 0;
1683         }
1684
1685         if (aResult->GetRowCount() > 1) {
1686                 AliError(Form("More than one entry in DAQ Logbook for run %d. Skipping", run));
1687                 delete aResult;
1688                 return 0;
1689         }
1690
1691         TSQLRow* aRow = aResult->Next();
1692         if (!aRow)
1693         {
1694                 AliError(Form("Could not retrieve row for run %d. Skipping", run));
1695                 delete aResult;
1696                 return 0;
1697         }
1698
1699         AliShuttleLogbookEntry* entry = new AliShuttleLogbookEntry(run);
1700
1701         for (Int_t ii = 0; ii < aResult->GetFieldCount(); ii++)
1702                 entry->SetRunParameter(aResult->GetFieldName(ii), aRow->GetField(ii));
1703
1704         UInt_t startTime = entry->GetStartTime();
1705         UInt_t endTime = entry->GetEndTime();
1706
1707         if (!startTime || !endTime || startTime > endTime) {
1708                 Log("SHUTTLE",
1709                         Form("QueryRunParameters - Invalid parameters for Run %d: startTime = %d, endTime = %d",
1710                                 run, startTime, endTime));
1711                 delete entry;
1712                 delete aRow;
1713                 delete aResult;
1714                 return 0;
1715         }
1716
1717         delete aRow;
1718         delete aResult;
1719
1720         return entry;
1721 }
1722
1723 //______________________________________________________________________________________________
1724 Bool_t AliShuttle::GetValueSet(const char* host, Int_t port, const char* entry,
1725                                 TObjArray* valueSet, DCSType type)
1726 {
1727         // Retrieve all "entry" data points from the DCS server
1728         // host, port: TSocket connection parameters
1729         // entry: name of the alias or data point
1730         // valueSet: array of retrieved AliDCSValue's
1731         // type: kAlias or kDP
1732
1733         AliDCSClient client(host, port, fTimeout, fRetries);
1734         if (!client.IsConnected())
1735         {
1736                 return kFALSE;
1737         }
1738
1739         Int_t result=0;
1740
1741         if (type == kAlias)
1742         {
1743                 result = client.GetAliasValues(entry,
1744                         GetCurrentStartTime(), GetCurrentEndTime(), valueSet);
1745         } else
1746         if (type == kDP)
1747         {
1748                 result = client.GetDPValues(entry,
1749                         GetCurrentStartTime(), GetCurrentEndTime(), valueSet);
1750         }
1751
1752         if (result < 0)
1753         {
1754                 Log(fCurrentDetector.Data(), Form("GetValueSet - Can't get '%s'! Reason: %s",
1755                         entry, AliDCSClient::GetErrorString(result)));
1756
1757                 if (result == AliDCSClient::fgkServerError)
1758                 {
1759                         Log(fCurrentDetector.Data(), Form("GetValueSet - Server error: %s",
1760                                 client.GetServerError().Data()));
1761                 }
1762
1763                 return kFALSE;
1764         }
1765
1766         return kTRUE;
1767 }
1768
1769 //______________________________________________________________________________________________
1770 const char* AliShuttle::GetFile(Int_t system, const char* detector,
1771                 const char* id, const char* source)
1772 {
1773         // Get calibration file from file exchange servers
1774         // First queris the FXS database for the file name, using the run, detector, id and source info
1775         // then calls RetrieveFile(filename) for actual copy to local disk
1776         // run: current run being processed (given by Logbook entry fLogbookEntry)
1777         // detector: the Preprocessor name
1778         // id: provided as a parameter by the Preprocessor
1779         // source: provided by the Preprocessor through GetFileSources function
1780
1781         // check if test mode should simulate a FXS error
1782         if (fTestMode & kErrorFXSFiles)
1783         {
1784                 Log(detector, Form("GetFile - In TESTMODE - Simulating error while connecting to %s FXS", GetSystemName(system)));
1785                 return 0;
1786         }
1787         
1788         // check connection, in case connect
1789         if (!Connect(system))
1790         {
1791                 Log(detector, Form("GetFile - Couldn't connect to %s FXS database", GetSystemName(system)));
1792                 return 0;
1793         }
1794
1795         // Query preparation
1796         TString sourceName(source);
1797         Int_t nFields = 3;
1798         TString sqlQueryStart = Form("select filePath,size,fileChecksum from %s where",
1799                                                                 fConfig->GetFXSdbTable(system));
1800         TString whereClause = Form("run=%d and detector=\"%s\" and fileId=\"%s\"",
1801                                                                 GetCurrentRun(), detector, id);
1802
1803         if (system == kDAQ)
1804         {
1805                 whereClause += Form(" and DAQsource=\"%s\"", source);
1806         }
1807         else if (system == kDCS)
1808         {
1809                 sourceName="none";
1810         }
1811         else if (system == kHLT)
1812         {
1813                 whereClause += Form(" and DDLnumbers=\"%s\"", source);
1814                 nFields = 3;
1815         }
1816
1817         TString sqlQuery = Form("%s %s", sqlQueryStart.Data(), whereClause.Data());
1818
1819         AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
1820
1821         // Query execution
1822         TSQLResult* aResult = 0;
1823         aResult = dynamic_cast<TSQLResult*> (fServer[system]->Query(sqlQuery));
1824         if (!aResult) {
1825                 Log(detector, Form("GetFileName - Can't execute SQL query to %s database for: id = %s, source = %s",
1826                                 GetSystemName(system), id, sourceName.Data()));
1827                 return 0;
1828         }
1829
1830         if(aResult->GetRowCount() == 0)
1831         {
1832                 Log(detector,
1833                         Form("GetFileName - No entry in %s FXS db for: id = %s, source = %s",
1834                                 GetSystemName(system), id, sourceName.Data()));
1835                 delete aResult;
1836                 return 0;
1837         }
1838
1839         if (aResult->GetRowCount() > 1) {
1840                 Log(detector,
1841                         Form("GetFileName - More than one entry in %s FXS db for: id = %s, source = %s",
1842                                 GetSystemName(system), id, sourceName.Data()));
1843                 delete aResult;
1844                 return 0;
1845         }
1846
1847         if (aResult->GetFieldCount() != nFields) {
1848                 Log(detector,
1849                         Form("GetFileName - Wrong field count in %s FXS db for: id = %s, source = %s",
1850                                 GetSystemName(system), id, sourceName.Data()));
1851                 delete aResult;
1852                 return 0;
1853         }
1854
1855         TSQLRow* aRow = dynamic_cast<TSQLRow*> (aResult->Next());
1856
1857         if (!aRow){
1858                 Log(detector, Form("GetFileName - Empty set result in %s FXS db from query: id = %s, source = %s",
1859                                 GetSystemName(system), id, sourceName.Data()));
1860                 delete aResult;
1861                 return 0;
1862         }
1863
1864         TString filePath(aRow->GetField(0), aRow->GetFieldLength(0));
1865         TString fileSize(aRow->GetField(1), aRow->GetFieldLength(1));
1866         TString fileChecksum(aRow->GetField(2), aRow->GetFieldLength(2));
1867
1868         delete aResult;
1869         delete aRow;
1870
1871         AliDebug(2, Form("filePath = %s; size = %s, fileChecksum = %s",
1872                                 filePath.Data(), fileSize.Data(), fileChecksum.Data()));
1873
1874         // retrieved file is renamed to make it unique
1875         TString localFileName = Form("%s_%s_%d_%s_%s.shuttle",
1876                                         GetSystemName(system), detector, GetCurrentRun(), id, sourceName.Data());
1877
1878
1879         // file retrieval from FXS
1880         UInt_t nRetries = 0;
1881         UInt_t maxRetries = 3;
1882         Bool_t result = kFALSE;
1883
1884         // copy!! if successful TSystem::Exec returns 0
1885         while(nRetries++ < maxRetries) {
1886                 AliDebug(2, Form("Trying to copy file. Retry # %d", nRetries));
1887                 result = RetrieveFile(system, filePath.Data(), localFileName.Data());
1888                 if(!result)
1889                 {
1890                         Log(detector, Form("GetFileName - Copy of file %s from %s FXS failed",
1891                                         filePath.Data(), GetSystemName(system)));
1892                         continue;
1893                 } else {
1894                         AliInfo(Form("File %s copied from %s FXS into %s/%s",
1895                                                 filePath.Data(), GetSystemName(system),
1896                                                 GetShuttleTempDir(), localFileName.Data()));
1897                 }
1898
1899                 if (fileChecksum.Length()>0)
1900                 {
1901                         // compare md5sum of local file with the one stored in the FXS DB
1902                         Int_t md5Comp = gSystem->Exec(Form("md5sum %s/%s |grep %s 2>&1 > /dev/null",
1903                                                 GetShuttleTempDir(), localFileName.Data(), fileChecksum.Data()));
1904
1905                         if (md5Comp != 0)
1906                         {
1907                                 Log(detector, Form("GetFileName - md5sum of file %s does not match with local copy!",
1908                                                         filePath.Data()));
1909                                 result = kFALSE;
1910                                 continue;
1911                         }
1912                 } else {
1913                         Log(fCurrentDetector, Form("GetFile - md5sum of file %s not set in %s database, skipping comparison",
1914                                                         filePath.Data(), GetSystemName(system)));
1915                 }
1916                 if (result) break;
1917         }
1918
1919         if(!result) return 0;
1920
1921         fFXSCalled[system]=kTRUE;
1922         TObjString *fileParams = new TObjString(Form("%s#!?!#%s", id, sourceName.Data()));
1923         fFXSlist[system].Add(fileParams);
1924
1925         static TString fullLocalFileName;
1926         fullLocalFileName = TString::Format("%s/%s", GetShuttleTempDir(), localFileName.Data());
1927
1928         AliInfo(Form("fullLocalFileName = %s", fullLocalFileName.Data()));
1929
1930         return fullLocalFileName.Data();
1931
1932 }
1933
1934 //______________________________________________________________________________________________
1935 Bool_t AliShuttle::RetrieveFile(UInt_t system, const char* fxsFileName, const char* localFileName)
1936 {
1937         //
1938         // Copies file from FXS to local Shuttle machine
1939         //
1940
1941         // check temp directory: trying to cd to temp; if it does not exist, create it
1942         AliDebug(2, Form("Copy file %s from %s FXS into %s/%s",
1943                         GetSystemName(system), fxsFileName, GetShuttleTempDir(), localFileName));
1944
1945         void* dir = gSystem->OpenDirectory(GetShuttleTempDir());
1946         if (dir == NULL) {
1947                 if (gSystem->mkdir(GetShuttleTempDir(), kTRUE)) {
1948                         AliError(Form("Can't open directory <%s>", GetShuttleTempDir()));
1949                         return kFALSE;
1950                 }
1951
1952         } else {
1953                 gSystem->FreeDirectory(dir);
1954         }
1955
1956         TString baseFXSFolder;
1957         if (system == kDAQ)
1958         {
1959                 baseFXSFolder = "FES/";
1960         }
1961         else if (system == kDCS)
1962         {
1963                 baseFXSFolder = "";
1964         }
1965         else if (system == kHLT)
1966         {
1967                 baseFXSFolder = "~/";
1968         }
1969
1970
1971         TString command = Form("scp -oPort=%d -2 %s@%s:%s%s %s/%s",
1972                 fConfig->GetFXSPort(system),
1973                 fConfig->GetFXSUser(system),
1974                 fConfig->GetFXSHost(system),
1975                 baseFXSFolder.Data(),
1976                 fxsFileName,
1977                 GetShuttleTempDir(),
1978                 localFileName);
1979
1980         AliDebug(2, Form("%s",command.Data()));
1981
1982         Bool_t result = (gSystem->Exec(command.Data()) == 0);
1983
1984         return result;
1985 }
1986
1987 //______________________________________________________________________________________________
1988 TList* AliShuttle::GetFileSources(Int_t system, const char* detector, const char* id)
1989 {
1990         //
1991         // Get sources producing the condition file Id from file exchange servers
1992         // if id is NULL all sources are returned (distinct)
1993         //
1994         
1995         // check if test mode should simulate a FXS error
1996         if (fTestMode & kErrorFXSSources)
1997         {
1998                 Log(detector, Form("GetFileSources - In TESTMODE - Simulating error while connecting to %s FXS", GetSystemName(system)));
1999                 return 0;
2000         }
2001
2002
2003         if (system == kDCS)
2004         {
2005                 AliError("DCS system has only one source of data!");
2006                 return NULL;
2007         }
2008
2009         // check connection, in case connect
2010         if (!Connect(system))
2011         {
2012                 Log(detector, Form("GetFileSources - Couldn't connect to %s FXS database", GetSystemName(system)));
2013                 return NULL;
2014         }
2015
2016         TString sourceName = 0;
2017         if (system == kDAQ)
2018         {
2019                 sourceName = "DAQsource";
2020         } else if (system == kHLT)
2021         {
2022                 sourceName = "DDLnumbers";
2023         }
2024
2025         TString sqlQueryStart = Form("select distinct %s from %s where", sourceName.Data(), fConfig->GetFXSdbTable(system));
2026         TString whereClause = Form("run=%d and detector=\"%s\"",
2027                                 GetCurrentRun(), detector);
2028         if (id)
2029                 whereClause += Form(" and fileId=\"%s\"", id);
2030         TString sqlQuery = Form("%s %s", sqlQueryStart.Data(), whereClause.Data());
2031
2032         AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
2033
2034         // Query execution
2035         TSQLResult* aResult;
2036         aResult = fServer[system]->Query(sqlQuery);
2037         if (!aResult) {
2038                 Log(detector, Form("GetFileSources - Can't execute SQL query to %s database for id: %s",
2039                                 GetSystemName(system), id));
2040                 return 0;
2041         }
2042
2043         TList *list = new TList();
2044         list->SetOwner(1);
2045         
2046         if (aResult->GetRowCount() == 0)
2047         {
2048                 Log(detector,
2049                         Form("GetFileSources - No entry in %s FXS table for id: %s", GetSystemName(system), id));
2050                 delete aResult;
2051                 return list;
2052         }
2053
2054         TSQLRow* aRow;
2055
2056         while ((aRow = aResult->Next()))
2057         {
2058
2059                 TString source(aRow->GetField(0), aRow->GetFieldLength(0));
2060                 AliDebug(2, Form("%s = %s", sourceName.Data(), source.Data()));
2061                 list->Add(new TObjString(source));
2062                 delete aRow;
2063         }
2064
2065         delete aResult;
2066
2067         return list;
2068 }
2069
2070 //______________________________________________________________________________________________
2071 TList* AliShuttle::GetFileIDs(Int_t system, const char* detector, const char* source)
2072 {
2073         //
2074         // Get all ids of condition files produced by a given source from file exchange servers
2075         //
2076         
2077         // check if test mode should simulate a FXS error
2078         if (fTestMode & kErrorFXSSources)
2079         {
2080                 Log(detector, Form("GetFileIDs - In TESTMODE - Simulating error while connecting to %s FXS", GetSystemName(system)));
2081                 return 0;
2082         }
2083
2084         // check connection, in case connect
2085         if (!Connect(system))
2086         {
2087                 Log(detector, Form("GetFileIDs - Couldn't connect to %s FXS database", GetSystemName(system)));
2088                 return NULL;
2089         }
2090
2091         TString sourceName = 0;
2092         if (system == kDAQ)
2093         {
2094                 sourceName = "DAQsource";
2095         } else if (system == kHLT)
2096         {
2097                 sourceName = "DDLnumbers";
2098         }
2099
2100         TString sqlQueryStart = Form("select fileId from %s where", fConfig->GetFXSdbTable(system));
2101         TString whereClause = Form("run=%d and detector=\"%s\"",
2102                                 GetCurrentRun(), detector);
2103         if (sourceName.Length() > 0 && source)
2104                 whereClause += Form(" and %s=\"%s\"", sourceName.Data(), source);
2105         TString sqlQuery = Form("%s %s", sqlQueryStart.Data(), whereClause.Data());
2106
2107         AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
2108
2109         // Query execution
2110         TSQLResult* aResult;
2111         aResult = fServer[system]->Query(sqlQuery);
2112         if (!aResult) {
2113                 Log(detector, Form("GetFileIDs - Can't execute SQL query to %s database for source: %s",
2114                                 GetSystemName(system), source));
2115                 return 0;
2116         }
2117
2118         TList *list = new TList();
2119         list->SetOwner(1);
2120         
2121         if (aResult->GetRowCount() == 0)
2122         {
2123                 Log(detector,
2124                         Form("GetFileIDs - No entry in %s FXS table for source: %s", GetSystemName(system), source));
2125                 delete aResult;
2126                 return list;
2127         }
2128
2129         TSQLRow* aRow;
2130
2131         while ((aRow = aResult->Next()))
2132         {
2133
2134                 TString id(aRow->GetField(0), aRow->GetFieldLength(0));
2135                 AliDebug(2, Form("fileId = %s", id.Data()));
2136                 list->Add(new TObjString(id));
2137                 delete aRow;
2138         }
2139
2140         delete aResult;
2141
2142         return list;
2143 }
2144
2145 //______________________________________________________________________________________________
2146 Bool_t AliShuttle::Connect(Int_t system)
2147 {
2148         // Connect to MySQL Server of the system's FXS MySQL databases
2149         // DAQ Logbook, Shuttle Logbook and DAQ FXS db are on the same host
2150         //
2151
2152         // check connection: if already connected return
2153         if(fServer[system] && fServer[system]->IsConnected()) return kTRUE;
2154
2155         TString dbHost, dbUser, dbPass, dbName;
2156
2157         if (system < 3) // FXS db servers
2158         {
2159                 dbHost = Form("mysql://%s:%d", fConfig->GetFXSdbHost(system), fConfig->GetFXSdbPort(system));
2160                 dbUser = fConfig->GetFXSdbUser(system);
2161                 dbPass = fConfig->GetFXSdbPass(system);
2162                 dbName =   fConfig->GetFXSdbName(system);
2163         } else { // Run & Shuttle logbook servers
2164         // TODO Will the Shuttle logbook server be the same as the Run logbook server ???
2165                 dbHost = Form("mysql://%s:%d", fConfig->GetDAQlbHost(), fConfig->GetDAQlbPort());
2166                 dbUser = fConfig->GetDAQlbUser();
2167                 dbPass = fConfig->GetDAQlbPass();
2168                 dbName =   fConfig->GetDAQlbDB();
2169         }
2170
2171         fServer[system] = TSQLServer::Connect(dbHost.Data(), dbUser.Data(), dbPass.Data());
2172         if (!fServer[system] || !fServer[system]->IsConnected()) {
2173                 if(system < 3)
2174                 {
2175                 AliError(Form("Can't establish connection to FXS database for %s",
2176                                         AliShuttleInterface::GetSystemName(system)));
2177                 } else {
2178                 AliError("Can't establish connection to Run logbook.");
2179                 }
2180                 if(fServer[system]) delete fServer[system];
2181                 return kFALSE;
2182         }
2183
2184         // Get tables
2185         TSQLResult* aResult=0;
2186         switch(system){
2187                 case kDAQ:
2188                         aResult = fServer[kDAQ]->GetTables(dbName.Data());
2189                         break;
2190                 case kDCS:
2191                         aResult = fServer[kDCS]->GetTables(dbName.Data());
2192                         break;
2193                 case kHLT:
2194                         aResult = fServer[kHLT]->GetTables(dbName.Data());
2195                         break;
2196                 default:
2197                         aResult = fServer[3]->GetTables(dbName.Data());
2198                         break;
2199         }
2200
2201         delete aResult;
2202         return kTRUE;
2203 }
2204
2205 //______________________________________________________________________________________________
2206 Bool_t AliShuttle::UpdateTable()
2207 {
2208         //
2209         // Update FXS table filling time_processed field in all rows corresponding to current run and detector
2210         //
2211
2212         Bool_t result = kTRUE;
2213
2214         for (UInt_t system=0; system<3; system++)
2215         {
2216                 if(!fFXSCalled[system]) continue;
2217
2218                 // check connection, in case connect
2219                 if (!Connect(system))
2220                 {
2221                         Log(fCurrentDetector, Form("UpdateTable - Couldn't connect to %s FXS database", GetSystemName(system)));
2222                         result = kFALSE;
2223                         continue;
2224                 }
2225
2226                 TTimeStamp now; // now
2227
2228                 // Loop on FXS list entries
2229                 TIter iter(&fFXSlist[system]);
2230                 TObjString *aFXSentry=0;
2231                 while ((aFXSentry = dynamic_cast<TObjString*> (iter.Next())))
2232                 {
2233                         TString aFXSentrystr = aFXSentry->String();
2234                         TObjArray *aFXSarray = aFXSentrystr.Tokenize("#!?!#");
2235                         if (!aFXSarray || aFXSarray->GetEntries() != 2 )
2236                         {
2237                                 Log(fCurrentDetector, Form("UpdateTable - error updating %s FXS entry. Check string: <%s>",
2238                                         GetSystemName(system), aFXSentrystr.Data()));
2239                                 if(aFXSarray) delete aFXSarray;
2240                                 result = kFALSE;
2241                                 continue;
2242                         }
2243                         const char* fileId = ((TObjString*) aFXSarray->At(0))->GetName();
2244                         const char* source = ((TObjString*) aFXSarray->At(1))->GetName();
2245
2246                         TString whereClause;
2247                         if (system == kDAQ)
2248                         {
2249                                 whereClause = Form("where run=%d and detector=\"%s\" and fileId=\"%s\" and DAQsource=\"%s\";",
2250                                                         GetCurrentRun(), fCurrentDetector.Data(), fileId, source);
2251                         }
2252                         else if (system == kDCS)
2253                         {
2254                                 whereClause = Form("where run=%d and detector=\"%s\" and fileId=\"%s\";",
2255                                                         GetCurrentRun(), fCurrentDetector.Data(), fileId);
2256                         }
2257                         else if (system == kHLT)
2258                         {
2259                                 whereClause = Form("where run=%d and detector=\"%s\" and fileId=\"%s\" and DDLnumbers=\"%s\";",
2260                                                         GetCurrentRun(), fCurrentDetector.Data(), fileId, source);
2261                         }
2262
2263                         delete aFXSarray;
2264
2265                         TString sqlQuery = Form("update %s set time_processed=%d %s", fConfig->GetFXSdbTable(system),
2266                                                                 now.GetSec(), whereClause.Data());
2267
2268                         AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
2269
2270                         // Query execution
2271                         TSQLResult* aResult;
2272                         aResult = dynamic_cast<TSQLResult*> (fServer[system]->Query(sqlQuery));
2273                         if (!aResult)
2274                         {
2275                                 Log(fCurrentDetector, Form("UpdateTable - %s db: can't execute SQL query <%s>",
2276                                                                 GetSystemName(system), sqlQuery.Data()));
2277                                 result = kFALSE;
2278                                 continue;
2279                         }
2280                         delete aResult;
2281                 }
2282         }
2283
2284         return result;
2285 }
2286
2287 //______________________________________________________________________________________________
2288 Bool_t AliShuttle::UpdateTableFailCase()
2289 {
2290         // Update FXS table filling time_processed field in all rows corresponding to current run and detector
2291         // this is called in case the preprocessor is declared failed for the current run, because
2292         // the fields are updated only in case of success
2293
2294         Bool_t result = kTRUE;
2295
2296         for (UInt_t system=0; system<3; system++)
2297         {
2298                 // check connection, in case connect
2299                 if (!Connect(system))
2300                 {
2301                         Log(fCurrentDetector, Form("UpdateTableFailCase - Couldn't connect to %s FXS database",
2302                                                         GetSystemName(system)));
2303                         result = kFALSE;
2304                         continue;
2305                 }
2306
2307                 TTimeStamp now; // now
2308
2309                 // Loop on FXS list entries
2310
2311                 TString whereClause = Form("where run=%d and detector=\"%s\";",
2312                                                 GetCurrentRun(), fCurrentDetector.Data());
2313
2314
2315                 TString sqlQuery = Form("update %s set time_processed=%d %s", fConfig->GetFXSdbTable(system),
2316                                                         now.GetSec(), whereClause.Data());
2317
2318                 AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
2319
2320                 // Query execution
2321                 TSQLResult* aResult;
2322                 aResult = dynamic_cast<TSQLResult*> (fServer[system]->Query(sqlQuery));
2323                 if (!aResult)
2324                 {
2325                         Log(fCurrentDetector, Form("UpdateTableFailCase - %s db: can't execute SQL query <%s>",
2326                                                         GetSystemName(system), sqlQuery.Data()));
2327                         result = kFALSE;
2328                         continue;
2329                 }
2330                 delete aResult;
2331         }
2332
2333         return result;
2334 }
2335
2336 //______________________________________________________________________________________________
2337 Bool_t AliShuttle::UpdateShuttleLogbook(const char* detector, const char* status)
2338 {
2339         //
2340         // Update Shuttle logbook filling detector or shuttle_done column
2341         // ex. of usage: UpdateShuttleLogbook("PHOS", "DONE") or UpdateShuttleLogbook("shuttle_done")
2342         //
2343
2344         // check connection, in case connect
2345         if(!Connect(3)){
2346                 Log("SHUTTLE", "UpdateShuttleLogbook - Couldn't connect to DAQ Logbook.");
2347                 return kFALSE;
2348         }
2349
2350         TString detName(detector);
2351         TString setClause;
2352         if(detName == "shuttle_done")
2353         {
2354                 setClause = "set shuttle_done=1";
2355
2356                 // Send the information to ML
2357                 TMonaLisaText  mlStatus("SHUTTLE_status", "Done");
2358
2359                 TList mlList;
2360                 mlList.Add(&mlStatus);
2361
2362                 fMonaLisa->SendParameters(&mlList);
2363         } else {
2364                 TString statusStr(status);
2365                 if(statusStr.Contains("done", TString::kIgnoreCase) ||
2366                    statusStr.Contains("failed", TString::kIgnoreCase)){
2367                         setClause = Form("set %s=\"%s\"", detector, status);
2368                 } else {
2369                         Log("SHUTTLE",
2370                                 Form("UpdateShuttleLogbook - Invalid status <%s> for detector %s",
2371                                         status, detector));
2372                         return kFALSE;
2373                 }
2374         }
2375
2376         TString whereClause = Form("where run=%d", GetCurrentRun());
2377
2378         TString sqlQuery = Form("update %s %s %s",
2379                                         fConfig->GetShuttlelbTable(), setClause.Data(), whereClause.Data());
2380
2381         AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
2382
2383         // Query execution
2384         TSQLResult* aResult;
2385         aResult = dynamic_cast<TSQLResult*> (fServer[3]->Query(sqlQuery));
2386         if (!aResult) {
2387                 Log("SHUTTLE", Form("UpdateShuttleLogbook - Can't execute query <%s>", sqlQuery.Data()));
2388                 return kFALSE;
2389         }
2390         delete aResult;
2391
2392         return kTRUE;
2393 }
2394
2395 //______________________________________________________________________________________________
2396 Int_t AliShuttle::GetCurrentRun() const
2397 {
2398         //
2399         // Get current run from logbook entry
2400         //
2401
2402         return fLogbookEntry ? fLogbookEntry->GetRun() : -1;
2403 }
2404
2405 //______________________________________________________________________________________________
2406 UInt_t AliShuttle::GetCurrentStartTime() const
2407 {
2408         //
2409         // get current start time
2410         //
2411
2412         return fLogbookEntry ? fLogbookEntry->GetStartTime() : 0;
2413 }
2414
2415 //______________________________________________________________________________________________
2416 UInt_t AliShuttle::GetCurrentEndTime() const
2417 {
2418         //
2419         // get current end time from logbook entry
2420         //
2421
2422         return fLogbookEntry ? fLogbookEntry->GetEndTime() : 0;
2423 }
2424
2425 //______________________________________________________________________________________________
2426 void AliShuttle::Log(const char* detector, const char* message)
2427 {
2428         //
2429         // Fill log string with a message
2430         //
2431
2432         void* dir = gSystem->OpenDirectory(GetShuttleLogDir());
2433         if (dir == NULL) {
2434                 if (gSystem->mkdir(GetShuttleLogDir(), kTRUE)) {
2435                         AliError(Form("Can't open directory <%s>", GetShuttleLogDir()));
2436                         return;
2437                 }
2438
2439         } else {
2440                 gSystem->FreeDirectory(dir);
2441         }
2442
2443         TString toLog = Form("%s (%d): %s - ", TTimeStamp(time(0)).AsString("s"), getpid(), detector);
2444         if (GetCurrentRun() >= 0) 
2445                 toLog += Form("run %d - ", GetCurrentRun());
2446         toLog += Form("%s", message);
2447
2448         AliInfo(toLog.Data());
2449         
2450         // if we redirect the log output already to the file, leave here
2451         if (fOutputRedirected && strcmp(detector, "SHUTTLE") != 0)
2452                 return;
2453
2454         TString fileName = GetLogFileName(detector);
2455         
2456         gSystem->ExpandPathName(fileName);
2457
2458         ofstream logFile;
2459         logFile.open(fileName, ofstream::out | ofstream::app);
2460
2461         if (!logFile.is_open()) {
2462                 AliError(Form("Could not open file %s", fileName.Data()));
2463                 return;
2464         }
2465
2466         logFile << toLog.Data() << "\n";
2467
2468         logFile.close();
2469 }
2470
2471 //______________________________________________________________________________________________
2472 TString AliShuttle::GetLogFileName(const char* detector) const
2473 {
2474         // 
2475         // returns the name of the log file for a given sub detector
2476         //
2477         
2478         TString fileName;
2479         
2480         if (GetCurrentRun() >= 0) 
2481                 fileName.Form("%s/%s_%d.log", GetShuttleLogDir(), detector, GetCurrentRun());
2482         else
2483                 fileName.Form("%s/%s.log", GetShuttleLogDir(), detector);
2484
2485         return fileName;
2486 }
2487
2488 //______________________________________________________________________________________________
2489 Bool_t AliShuttle::Collect(Int_t run)
2490 {
2491         //
2492         // Collects conditions data for all UNPROCESSED run written to DAQ LogBook in case of run = -1 (default)
2493         // If a dedicated run is given this run is processed
2494         //
2495         // In operational mode, this is the Shuttle function triggered by the EOR signal.
2496         //
2497
2498         if (run == -1)
2499                 Log("SHUTTLE","Collect - Shuttle called. Collecting conditions data for unprocessed runs");
2500         else
2501                 Log("SHUTTLE", Form("Collect - Shuttle called. Collecting conditions data for run %d", run));
2502
2503         SetLastAction("Starting");
2504
2505         TString whereClause("where shuttle_done=0");
2506         if (run != -1)
2507                 whereClause += Form(" and run=%d", run);
2508
2509         TObjArray shuttleLogbookEntries;
2510         if (!QueryShuttleLogbook(whereClause, shuttleLogbookEntries))
2511         {
2512                 Log("SHUTTLE", "Collect - Can't retrieve entries from Shuttle logbook");
2513                 return kFALSE;
2514         }
2515
2516         if (shuttleLogbookEntries.GetEntries() == 0)
2517         {
2518                 if (run == -1)
2519                         Log("SHUTTLE","Collect - Found no UNPROCESSED runs in Shuttle logbook");
2520                 else
2521                         Log("SHUTTLE", Form("Collect - Run %d is already DONE "
2522                                                 "or it does not exist in Shuttle logbook", run));
2523                 return kTRUE;
2524         }
2525
2526         for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
2527                 fFirstUnprocessed[iDet] = kTRUE;
2528
2529         if (run != -1)
2530         {
2531                 // query Shuttle logbook for earlier runs, check if some detectors are unprocessed,
2532                 // flag them into fFirstUnprocessed array
2533                 TString whereClause(Form("where shuttle_done=0 and run < %d", run));
2534                 TObjArray tmpLogbookEntries;
2535                 if (!QueryShuttleLogbook(whereClause, tmpLogbookEntries))
2536                 {
2537                         Log("SHUTTLE", "Collect - Can't retrieve entries from Shuttle logbook");
2538                         return kFALSE;
2539                 }
2540
2541                 TIter iter(&tmpLogbookEntries);
2542                 AliShuttleLogbookEntry* anEntry = 0;
2543                 while ((anEntry = dynamic_cast<AliShuttleLogbookEntry*> (iter.Next())))
2544                 {
2545                         for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
2546                         {
2547                                 if (anEntry->GetDetectorStatus(iDet) == AliShuttleLogbookEntry::kUnprocessed)
2548                                 {
2549                                         AliDebug(2, Form("Run %d: setting %s as \"not first time unprocessed\"",
2550                                                         anEntry->GetRun(), GetDetName(iDet)));
2551                                         fFirstUnprocessed[iDet] = kFALSE;
2552                                 }
2553                         }
2554
2555                 }
2556
2557         }
2558
2559         if (!RetrieveConditionsData(shuttleLogbookEntries))
2560         {
2561                 Log("SHUTTLE", "Collect - Process of at least one run failed");
2562                 return kFALSE;
2563         }
2564
2565         Log("SHUTTLE", "Collect - Requested run(s) successfully processed");
2566         return kTRUE;
2567 }
2568
2569 //______________________________________________________________________________________________
2570 Bool_t AliShuttle::RetrieveConditionsData(const TObjArray& dateEntries)
2571 {
2572         //
2573         // Retrieve conditions data for all runs that aren't processed yet
2574         //
2575
2576         Bool_t hasError = kFALSE;
2577
2578         TIter iter(&dateEntries);
2579         AliShuttleLogbookEntry* anEntry;
2580
2581         while ((anEntry = (AliShuttleLogbookEntry*) iter.Next())){
2582                 if (!Process(anEntry)){
2583                         hasError = kTRUE;
2584                 }
2585
2586                 // clean SHUTTLE temp directory
2587                 TString filename = Form("%s/*.shuttle", GetShuttleTempDir());
2588                 RemoveFile(filename.Data());
2589         }
2590
2591         return hasError == kFALSE;
2592 }
2593
2594 //______________________________________________________________________________________________
2595 ULong_t AliShuttle::GetTimeOfLastAction() const
2596 {
2597         //
2598         // Gets time of last action
2599         //
2600
2601         ULong_t tmp;
2602
2603         fMonitoringMutex->Lock();
2604
2605         tmp = fLastActionTime;
2606
2607         fMonitoringMutex->UnLock();
2608
2609         return tmp;
2610 }
2611
2612 //______________________________________________________________________________________________
2613 const TString AliShuttle::GetLastAction() const
2614 {
2615         //
2616         // returns a string description of the last action
2617         //
2618
2619         TString tmp;
2620
2621         fMonitoringMutex->Lock();
2622         
2623         tmp = fLastAction;
2624         
2625         fMonitoringMutex->UnLock();
2626
2627         return tmp;
2628 }
2629
2630 //______________________________________________________________________________________________
2631 void AliShuttle::SetLastAction(const char* action)
2632 {
2633         //
2634         // updates the monitoring variables
2635         //
2636
2637         fMonitoringMutex->Lock();
2638
2639         fLastAction = action;
2640         fLastActionTime = time(0);
2641         
2642         fMonitoringMutex->UnLock();
2643 }
2644
2645 //______________________________________________________________________________________________
2646 const char* AliShuttle::GetRunParameter(const char* param)
2647 {
2648         //
2649         // returns run parameter read from DAQ logbook
2650         //
2651
2652         if(!fLogbookEntry) {
2653                 AliError("No logbook entry!");
2654                 return 0;
2655         }
2656
2657         return fLogbookEntry->GetRunParameter(param);
2658 }
2659
2660 //______________________________________________________________________________________________
2661 AliCDBEntry* AliShuttle::GetFromOCDB(const char* detector, const AliCDBPath& path)
2662 {
2663         //
2664         // returns object from OCDB valid for current run
2665         //
2666
2667         if (fTestMode & kErrorOCDB)
2668         {
2669                 Log(detector, "GetFromOCDB - In TESTMODE - Simulating error with OCDB");
2670                 return 0;
2671         }
2672         
2673         AliCDBStorage *sto = AliCDBManager::Instance()->GetStorage(fgkMainCDB);
2674         if (!sto)
2675         {
2676                 Log(detector, "GetFromOCDB - Cannot activate main OCDB for query!");
2677                 return 0;
2678         }
2679
2680         return dynamic_cast<AliCDBEntry*> (sto->Get(path, GetCurrentRun()));
2681 }
2682
2683 //______________________________________________________________________________________________
2684 Bool_t AliShuttle::SendMail()
2685 {
2686         //
2687         // sends a mail to the subdetector expert in case of preprocessor error
2688         //
2689         
2690         if (fTestMode != kNone)
2691                 return kTRUE;
2692
2693         void* dir = gSystem->OpenDirectory(GetShuttleLogDir());
2694         if (dir == NULL)
2695         {
2696                 if (gSystem->mkdir(GetShuttleLogDir(), kTRUE))
2697                 {
2698                         AliError(Form("Can't open directory <%s>", GetShuttleLogDir()));
2699                         return kFALSE;
2700                 }
2701
2702         } else {
2703                 gSystem->FreeDirectory(dir);
2704         }
2705
2706         TString bodyFileName;
2707         bodyFileName.Form("%s/mail.body", GetShuttleLogDir());
2708         gSystem->ExpandPathName(bodyFileName);
2709
2710         ofstream mailBody;
2711         mailBody.open(bodyFileName, ofstream::out);
2712
2713         if (!mailBody.is_open())
2714         {
2715                 AliError(Form("Could not open mail body file %s", bodyFileName.Data()));
2716                 return kFALSE;
2717         }
2718
2719         TString to="";
2720         TIter iterExperts(fConfig->GetResponsibles(fCurrentDetector));
2721         TObjString *anExpert=0;
2722         while ((anExpert = (TObjString*) iterExperts.Next()))
2723         {
2724                 to += Form("%s,", anExpert->GetName());
2725         }
2726         to.Remove(to.Length()-1);
2727         AliDebug(2, Form("to: %s",to.Data()));
2728
2729         if (to.IsNull()) {
2730                 AliInfo("List of detector responsibles not yet set!");
2731                 return kFALSE;
2732         }
2733
2734         TString cc="alberto.colla@cern.ch";
2735
2736         TString subject = Form("%s Shuttle preprocessor FAILED in run %d !",
2737                                 fCurrentDetector.Data(), GetCurrentRun());
2738         AliDebug(2, Form("subject: %s", subject.Data()));
2739
2740         TString body = Form("Dear %s expert(s), \n\n", fCurrentDetector.Data());
2741         body += Form("SHUTTLE just detected that your preprocessor "
2742                         "failed processing run %d!!\n\n", GetCurrentRun());
2743         body += Form("Please check %s status on the SHUTTLE monitoring page: \n\n", fCurrentDetector.Data());
2744         body += Form("\thttp://pcalimonitor.cern.ch:8889/shuttle.jsp?time=168 \n\n");
2745         body += Form("Find the %s log for the current run on \n\n"
2746                 "\thttp://pcalishuttle01.cern.ch:8880/logs/%s_%d.log \n\n", 
2747                 fCurrentDetector.Data(), fCurrentDetector.Data(), GetCurrentRun());
2748         body += Form("The last 10 lines of %s log file are following:\n\n");
2749
2750         AliDebug(2, Form("Body begin: %s", body.Data()));
2751
2752         mailBody << body.Data();
2753         mailBody.close();
2754         mailBody.open(bodyFileName, ofstream::out | ofstream::app);
2755
2756         TString logFileName = Form("%s/%s_%d.log", GetShuttleLogDir(), fCurrentDetector.Data(), GetCurrentRun());
2757         TString tailCommand = Form("tail -n 10 %s >> %s", logFileName.Data(), bodyFileName.Data());
2758         if (gSystem->Exec(tailCommand.Data()))
2759         {
2760                 mailBody << Form("%s log file not found ...\n\n", fCurrentDetector.Data());
2761         }
2762
2763         TString endBody = Form("------------------------------------------------------\n\n");
2764         endBody += Form("In case of problems please contact the SHUTTLE core team.\n\n");
2765         endBody += "Please do not answer this message directly, it is automatically generated.\n\n";
2766         endBody += "Greetings,\n\n \t\t\tthe SHUTTLE\n";
2767
2768         AliDebug(2, Form("Body end: %s", endBody.Data()));
2769
2770         mailBody << endBody.Data();
2771
2772         mailBody.close();
2773
2774         // send mail!
2775         TString mailCommand = Form("mail -s \"%s\" -c %s %s < %s",
2776                                                 subject.Data(),
2777                                                 cc.Data(),
2778                                                 to.Data(),
2779                                                 bodyFileName.Data());
2780         AliDebug(2, Form("mail command: %s", mailCommand.Data()));
2781
2782         Bool_t result = gSystem->Exec(mailCommand.Data());
2783
2784         return result == 0;
2785 }
2786
2787 //______________________________________________________________________________________________
2788 const char* AliShuttle::GetRunType()
2789 {
2790         //
2791         // returns run type read from "run type" logbook
2792         //
2793
2794         if(!fLogbookEntry) {
2795                 AliError("No logbook entry!");
2796                 return 0;
2797         }
2798
2799         return fLogbookEntry->GetRunType();
2800 }
2801
2802 //______________________________________________________________________________________________
2803 void AliShuttle::SetShuttleTempDir(const char* tmpDir)
2804 {
2805         //
2806         // sets Shuttle temp directory
2807         //
2808
2809         fgkShuttleTempDir = gSystem->ExpandPathName(tmpDir);
2810 }
2811
2812 //______________________________________________________________________________________________
2813 void AliShuttle::SetShuttleLogDir(const char* logDir)
2814 {
2815         //
2816         // sets Shuttle log directory
2817         //
2818
2819         fgkShuttleLogDir = gSystem->ExpandPathName(logDir);
2820 }