]> git.uio.no Git - u/mrichter/AliRoot.git/blob - SHUTTLE/AliShuttle.cxx
HLT updated; missing map bug removed.
[u/mrichter/AliRoot.git] / SHUTTLE / AliShuttle.cxx
1 /**************************************************************************
2  * Copyright(c) 1998-1999, ALICE Experiment at CERN, All rights reserved. *
3  *                                                                        *
4  * Author: The ALICE Off-line Project.                                    *
5  * Contributors are mentioned in the code where appropriate.              *
6  *                                                                        *
7  * Permission to use, copy, modify and distribute this software and its   *
8  * documentation strictly for non-commercial purposes is hereby granted   *
9  * without fee, provided that the above copyright notice appears in all   *
10  * copies and that both the copyright notice and this permission notice   *
11  * appear in the supporting documentation. The authors make no claims     *
12  * about the suitability of this software for any purpose. It is          *
13  * provided "as is" without express or implied warranty.                  *
14  **************************************************************************/
15
16 /*
17 $Log$
18 Revision 1.46  2007/06/09 13:01:09  jgrosseo
19 Switching to retrieval of several DCS DPs at a time (multiDPrequest)
20
21 Revision 1.45  2007/05/30 06:35:20  jgrosseo
22 Adding functionality to the Shuttle/TestShuttle:
23 o) Function to retrieve list of sources from a given system (GetFileSources with id=0)
24 o) Function to retrieve list of IDs for a given source      (GetFileIDs)
25 These functions are needed for dealing with the tag files that are saved for the GRP preprocessor
26 Example code has been added to the TestProcessor in TestShuttle
27
28 Revision 1.44  2007/05/11 16:09:32  acolla
29 Reference files for ITS, MUON and PHOS are now stored in OfflineDetName/OnlineDetName/run_...
30 example: ITS/SPD/100_filename.root
31
32 Revision 1.43  2007/05/10 09:59:51  acolla
33 Various bug fixes in StoreRefFilesToGrid; Cleaning of reference storage before processing detector (CleanReferenceStorage)
34
35 Revision 1.42  2007/05/03 08:01:39  jgrosseo
36 typo in last commit :-(
37
38 Revision 1.41  2007/05/03 08:00:48  jgrosseo
39 fixing log message when pp want to skip dcs value retrieval
40
41 Revision 1.40  2007/04/27 07:06:48  jgrosseo
42 GetFileSources returns empty list in case of no files, but successful query
43 No mails sent in testmode
44
45 Revision 1.39  2007/04/17 12:43:57  acolla
46 Correction in StoreOCDB; change of text in mail to detector expert
47
48 Revision 1.38  2007/04/12 08:26:18  jgrosseo
49 updated comment
50
51 Revision 1.37  2007/04/10 16:53:14  jgrosseo
52 redirecting sub detector stdout, stderr to sub detector log file
53
54 Revision 1.35  2007/04/04 16:26:38  acolla
55 1. Re-organization of function calls in TestPreprocessor to make it more meaningful.
56 2. Added missing dependency in test preprocessors.
57 3. in AliShuttle.cxx: processing time and memory consumption info on a single line.
58
59 Revision 1.34  2007/04/04 10:33:36  jgrosseo
60 1) Storing of files to the Grid is now done _after_ your preprocessors succeeded. This is transparent, which means that you can still use the same functions (Store, StoreReferenceData) to store files to the Grid. However, the Shuttle first stores them locally and transfers them after the preprocessor finished. The return code of these two functions has changed from UInt_t to Bool_t which gives you the success of the storing.
61 In case of an error with the Grid, the Shuttle will retry the storing later, the preprocessor does not need to be run again.
62
63 2) The meaning of the return code of the preprocessor has changed. 0 is now success and any other value means failure. This value is stored in the log and you can use it to keep details about the error condition.
64
65 3) New function StoreReferenceFile to _directly_ store a file (without opening it) to the reference storage.
66
67 4) The memory usage of the preprocessor is monitored. If it exceeds 2 GB it is terminated.
68
69 5) New function AliPreprocessor::ProcessDCS(). If you do not need to have DCS data in all cases, you can skip the processing by implemting this function and returning kFALSE under certain conditions. E.g. if there is a certain run type.
70 If you always need DCS data (like before), you do not need to implement it.
71
72 6) The run type has been added to the monitoring page
73
74 Revision 1.33  2007/04/03 13:56:01  acolla
75 Grid Storage at the end of preprocessing. Added virtual method to disable DCS query according to the
76 run type.
77
78 Revision 1.32  2007/02/28 10:41:56  acolla
79 Run type field added in SHUTTLE framework. Run type is read from "run type" logbook and retrieved by
80 AliPreprocessor::GetRunType() function.
81 Added some ldap definition files.
82
83 Revision 1.30  2007/02/13 11:23:21  acolla
84 Moved getters and setters of Shuttle's main OCDB/Reference, local
85 OCDB/Reference, temp and log folders to AliShuttleInterface
86
87 Revision 1.27  2007/01/30 17:52:42  jgrosseo
88 adding monalisa monitoring
89
90 Revision 1.26  2007/01/23 19:20:03  acolla
91 Removed old ldif files, added TOF, MCH ldif files. Added some options in
92 AliShuttleConfig::Print. Added in Ali Shuttle: SetShuttleTempDir and
93 SetShuttleLogDir
94
95 Revision 1.25  2007/01/15 19:13:52  acolla
96 Moved some AliInfo to AliDebug in SendMail function
97
98 Revision 1.21  2006/12/07 08:51:26  jgrosseo
99 update (alberto):
100 table, db names in ldap configuration
101 added GRP preprocessor
102 DCS data can also be retrieved by data point
103
104 Revision 1.20  2006/11/16 16:16:48  jgrosseo
105 introducing strict run ordering flag
106 removed giving preprocessor name to preprocessor, they have to know their name themselves ;-)
107
108 Revision 1.19  2006/11/06 14:23:04  jgrosseo
109 major update (Alberto)
110 o) reading of run parameters from the logbook
111 o) online offline naming conversion
112 o) standalone DCSclient package
113
114 Revision 1.18  2006/10/20 15:22:59  jgrosseo
115 o) Adding time out to the execution of the preprocessors: The Shuttle forks and the parent process monitors the child
116 o) Merging Collect, CollectAll, CollectNew function
117 o) Removing implementation of empty copy constructors (declaration still there!)
118
119 Revision 1.17  2006/10/05 16:20:55  jgrosseo
120 adapting to new CDB classes
121
122 Revision 1.16  2006/10/05 15:46:26  jgrosseo
123 applying to the new interface
124
125 Revision 1.15  2006/10/02 16:38:39  jgrosseo
126 update (alberto):
127 fixed memory leaks
128 storing of objects that failed to be stored to the grid before
129 interfacing of shuttle status table in daq system
130
131 Revision 1.14  2006/08/29 09:16:05  jgrosseo
132 small update
133
134 Revision 1.13  2006/08/15 10:50:00  jgrosseo
135 effc++ corrections (alberto)
136
137 Revision 1.12  2006/08/08 14:19:29  jgrosseo
138 Update to shuttle classes (Alberto)
139
140 - Possibility to set the full object's path in the Preprocessor's and
141 Shuttle's  Store functions
142 - Possibility to extend the object's run validity in the same classes
143 ("startValidity" and "validityInfinite" parameters)
144 - Implementation of the StoreReferenceData function to store reference
145 data in a dedicated CDB storage.
146
147 Revision 1.11  2006/07/21 07:37:20  jgrosseo
148 last run is stored after each run
149
150 Revision 1.10  2006/07/20 09:54:40  jgrosseo
151 introducing status management: The processing per subdetector is divided into several steps,
152 after each step the status is stored on disk. If the system crashes in any of the steps the Shuttle
153 can keep track of the number of failures and skips further processing after a certain threshold is
154 exceeded. These thresholds can be configured in LDAP.
155
156 Revision 1.9  2006/07/19 10:09:55  jgrosseo
157 new configuration, accesst to DAQ FES (Alberto)
158
159 Revision 1.8  2006/07/11 12:44:36  jgrosseo
160 adding parameters for extended validity range of data produced by preprocessor
161
162 Revision 1.7  2006/07/10 14:37:09  jgrosseo
163 small fix + todo comment
164
165 Revision 1.6  2006/07/10 13:01:41  jgrosseo
166 enhanced storing of last sucessfully processed run (alberto)
167
168 Revision 1.5  2006/07/04 14:59:57  jgrosseo
169 revision of AliDCSValue: Removed wrapper classes, reduced storage size per value by factor 2
170
171 Revision 1.4  2006/06/12 09:11:16  jgrosseo
172 coding conventions (Alberto)
173
174 Revision 1.3  2006/06/06 14:26:40  jgrosseo
175 o) removed files that were moved to STEER
176 o) shuttle updated to follow the new interface (Alberto)
177
178 Revision 1.2  2006/03/07 07:52:34  hristov
179 New version (B.Yordanov)
180
181 Revision 1.6  2005/11/19 17:19:14  byordano
182 RetrieveDATEEntries and RetrieveConditionsData added
183
184 Revision 1.5  2005/11/19 11:09:27  byordano
185 AliShuttle declaration added
186
187 Revision 1.4  2005/11/17 17:47:34  byordano
188 TList changed to TObjArray
189
190 Revision 1.3  2005/11/17 14:43:23  byordano
191 import to local CVS
192
193 Revision 1.1.1.1  2005/10/28 07:33:58  hristov
194 Initial import as subdirectory in AliRoot
195
196 Revision 1.2  2005/09/13 08:41:15  byordano
197 default startTime endTime added
198
199 Revision 1.4  2005/08/30 09:13:02  byordano
200 some docs added
201
202 Revision 1.3  2005/08/29 21:15:47  byordano
203 some docs added
204
205 */
206
207 //
208 // This class is the main manager for AliShuttle. 
209 // It organizes the data retrieval from DCS and call the 
210 // interface methods of AliPreprocessor.
211 // For every detector in AliShuttleConfgi (see AliShuttleConfig),
212 // data for its set of aliases is retrieved. If there is registered
213 // AliPreprocessor for this detector then it will be used
214 // accroding to the schema (see AliPreprocessor).
215 // If there isn't registered AliPreprocessor than the retrieved
216 // data is stored automatically to the undelying AliCDBStorage.
217 // For detSpec is used the alias name.
218 //
219
220 #include "AliShuttle.h"
221
222 #include "AliCDBManager.h"
223 #include "AliCDBStorage.h"
224 #include "AliCDBId.h"
225 #include "AliCDBRunRange.h"
226 #include "AliCDBPath.h"
227 #include "AliCDBEntry.h"
228 #include "AliShuttleConfig.h"
229 #include "DCSClient/AliDCSClient.h"
230 #include "AliLog.h"
231 #include "AliPreprocessor.h"
232 #include "AliShuttleStatus.h"
233 #include "AliShuttleLogbookEntry.h"
234
235 #include <TSystem.h>
236 #include <TObject.h>
237 #include <TString.h>
238 #include <TTimeStamp.h>
239 #include <TObjString.h>
240 #include <TSQLServer.h>
241 #include <TSQLResult.h>
242 #include <TSQLRow.h>
243 #include <TMutex.h>
244 #include <TSystemDirectory.h>
245 #include <TSystemFile.h>
246 #include <TFileMerger.h>
247 #include <TGrid.h>
248 #include <TGridResult.h>
249
250 #include <TMonaLisaWriter.h>
251
252 #include <fstream>
253
254 #include <sys/types.h>
255 #include <sys/wait.h>
256
257 ClassImp(AliShuttle)
258
259 //______________________________________________________________________________________________
260 AliShuttle::AliShuttle(const AliShuttleConfig* config,
261                 UInt_t timeout, Int_t retries):
262 fConfig(config),
263 fTimeout(timeout), fRetries(retries),
264 fPreprocessorMap(),
265 fLogbookEntry(0),
266 fCurrentDetector(),
267 fStatusEntry(0),
268 fMonitoringMutex(0),
269 fLastActionTime(0),
270 fLastAction(),
271 fMonaLisa(0),
272 fTestMode(kNone),
273 fReadTestMode(kFALSE),
274 fOutputRedirected(kFALSE)
275 {
276         //
277         // config: AliShuttleConfig used
278         // timeout: timeout used for AliDCSClient connection
279         // retries: the number of retries in case of connection error.
280         //
281
282         if (!fConfig->IsValid()) AliFatal("********** !!!!! Invalid configuration !!!!! **********");
283         for(int iSys=0;iSys<4;iSys++) {
284                 fServer[iSys]=0;
285                 if (iSys < 3)
286                         fFXSlist[iSys].SetOwner(kTRUE);
287         }
288         fPreprocessorMap.SetOwner(kTRUE);
289
290         for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
291                 fFirstUnprocessed[iDet] = kFALSE;
292
293         fMonitoringMutex = new TMutex();
294 }
295
296 //______________________________________________________________________________________________
297 AliShuttle::~AliShuttle()
298 {
299         //
300         // destructor
301         //
302
303         fPreprocessorMap.DeleteAll();
304         for(int iSys=0;iSys<4;iSys++)
305                 if(fServer[iSys]) {
306                         fServer[iSys]->Close();
307                         delete fServer[iSys];
308                         fServer[iSys] = 0;
309                 }
310
311         if (fStatusEntry){
312                 delete fStatusEntry;
313                 fStatusEntry = 0;
314         }
315         
316         if (fMonitoringMutex) 
317         {
318                 delete fMonitoringMutex;
319                 fMonitoringMutex = 0;
320         }
321 }
322
323 //______________________________________________________________________________________________
324 void AliShuttle::RegisterPreprocessor(AliPreprocessor* preprocessor)
325 {
326         //
327         // Registers new AliPreprocessor.
328         // It uses GetName() for indentificator of the pre processor.
329         // The pre processor is registered it there isn't any other
330         // with the same identificator (GetName()).
331         //
332
333         const char* detName = preprocessor->GetName();
334         if(GetDetPos(detName) < 0)
335                 AliFatal(Form("********** !!!!! Invalid detector name: %s !!!!! **********", detName));
336
337         if (fPreprocessorMap.GetValue(detName)) {
338                 AliWarning(Form("AliPreprocessor %s is already registered!", detName));
339                 return;
340         }
341
342         fPreprocessorMap.Add(new TObjString(detName), preprocessor);
343 }
344 //______________________________________________________________________________________________
345 Bool_t AliShuttle::Store(const AliCDBPath& path, TObject* object,
346                 AliCDBMetaData* metaData, Int_t validityStart, Bool_t validityInfinite)
347 {
348         // Stores a CDB object in the storage for offline reconstruction. Objects that are not needed for
349         // offline reconstruction, but should be stored anyway (e.g. for debugging) should NOT be stored
350         // using this function. Use StoreReferenceData instead!
351         // It calls StoreLocally function which temporarily stores the data locally; when the preprocessor
352         // finishes the data are transferred to the main storage (Grid).
353
354         return StoreLocally(fgkLocalCDB, path, object, metaData, validityStart, validityInfinite);
355 }
356
357 //______________________________________________________________________________________________
358 Bool_t AliShuttle::StoreReferenceData(const AliCDBPath& path, TObject* object, AliCDBMetaData* metaData)
359 {
360         // Stores a CDB object in the storage for reference data. This objects will not be available during
361         // offline reconstrunction. Use this function for reference data only!
362         // It calls StoreLocally function which temporarily stores the data locally; when the preprocessor
363         // finishes the data are transferred to the main storage (Grid).
364
365         return StoreLocally(fgkLocalRefStorage, path, object, metaData);
366 }
367
368 //______________________________________________________________________________________________
369 Bool_t AliShuttle::StoreLocally(const TString& localUri,
370                         const AliCDBPath& path, TObject* object, AliCDBMetaData* metaData,
371                         Int_t validityStart, Bool_t validityInfinite)
372 {
373         // Store object temporarily in local storage. Parameters are passed by Store and StoreReferenceData functions.
374         // when the preprocessor finishes the data are transferred to the main storage (Grid).
375         // The parameters are:
376         //   1) Uri of the backup storage (Local)
377         //   2) the object's path.
378         //   3) the object to be stored
379         //   4) the metaData to be associated with the object
380         //   5) the validity start run number w.r.t. the current run,
381         //      if the data is valid only for this run leave the default 0
382         //   6) specifies if the calibration data is valid for infinity (this means until updated),
383         //      typical for calibration runs, the default is kFALSE
384         //
385         // returns 0 if fail, 1 otherwise
386
387         if (fTestMode & kErrorStorage)
388         {
389                 Log(fCurrentDetector, "StoreLocally - In TESTMODE - Simulating error while storing locally");
390                 return kFALSE;
391         }
392         
393         const char* cdbType = (localUri == fgkLocalCDB) ? "CDB" : "Reference";
394
395         Int_t firstRun = GetCurrentRun() - validityStart;
396         if(firstRun < 0) {
397                 AliWarning("First valid run happens to be less than 0! Setting it to 0.");
398                 firstRun=0;
399         }
400
401         Int_t lastRun = -1;
402         if(validityInfinite) {
403                 lastRun = AliCDBRunRange::Infinity();
404         } else {
405                 lastRun = GetCurrentRun();
406         }
407
408         // Version is set to current run, it will be used later to transfer data to Grid
409         AliCDBId id(path, firstRun, lastRun, GetCurrentRun(), -1);
410
411         if(! dynamic_cast<TObjString*> (metaData->GetProperty("RunUsed(TObjString)"))){
412                 TObjString runUsed = Form("%d", GetCurrentRun());
413                 metaData->SetProperty("RunUsed(TObjString)", runUsed.Clone());
414         }
415
416         Bool_t result = kFALSE;
417
418         if (!(AliCDBManager::Instance()->GetStorage(localUri))) {
419                 Log("SHUTTLE", Form("StoreLocally - Cannot activate local %s storage", cdbType));
420         } else {
421                 result = AliCDBManager::Instance()->GetStorage(localUri)
422                                         ->Put(object, id, metaData);
423         }
424
425         if(!result) {
426
427                 Log(fCurrentDetector, Form("StoreLocally - Can't store object <%s>!", id.ToString().Data()));
428         }
429
430         return result;
431 }
432
433 //______________________________________________________________________________________________
434 Bool_t AliShuttle::StoreOCDB()
435 {
436         //
437         // Called when preprocessor ends successfully or when previous storage attempt failed (kStoreError status)
438         // Calls underlying StoreOCDB(const char*) function twice, for OCDB and Reference storage.
439         // Then calls StoreRefFilesToGrid to store reference files. 
440         //
441         
442         if (fTestMode & kErrorGrid)
443         {
444                 Log("SHUTTLE", "StoreOCDB - In TESTMODE - Simulating error while storing in the Grid");
445                 Log(fCurrentDetector, "StoreOCDB - In TESTMODE - Simulating error while storing in the Grid");
446                 return kFALSE;
447         }
448         
449         Log("SHUTTLE","Storing OCDB data ...");
450         Bool_t resultCDB = StoreOCDB(fgkMainCDB);
451
452         Log("SHUTTLE","Storing reference data ...");
453         Bool_t resultRef = StoreOCDB(fgkMainRefStorage);
454         
455         Log("SHUTTLE","Storing reference files ...");
456         Bool_t resultRefFiles = StoreRefFilesToGrid();
457         
458         return resultCDB && resultRef && resultRefFiles;
459 }
460
461 //______________________________________________________________________________________________
462 Bool_t AliShuttle::StoreOCDB(const TString& gridURI)
463 {
464         //
465         // Called by StoreOCDB(), performs actual storage to the main OCDB and reference storages (Grid)
466         //
467
468         TObjArray* gridIds=0;
469
470         Bool_t result = kTRUE;
471
472         const char* type = 0;
473         TString localURI;
474         if(gridURI == fgkMainCDB) {
475                 type = "OCDB";
476                 localURI = fgkLocalCDB;
477         } else if(gridURI == fgkMainRefStorage) {
478                 type = "reference";
479                 localURI = fgkLocalRefStorage;
480         } else {
481                 AliError(Form("Invalid storage URI: %s", gridURI.Data()));
482                 return kFALSE;
483         }
484
485         AliCDBManager* man = AliCDBManager::Instance();
486
487         AliCDBStorage *gridSto = man->GetStorage(gridURI);
488         if(!gridSto) {
489                 Log("SHUTTLE",
490                         Form("StoreOCDB - cannot activate main %s storage", type));
491                 return kFALSE;
492         }
493
494         gridIds = gridSto->GetQueryCDBList();
495
496         // get objects previously stored in local CDB
497         AliCDBStorage *localSto = man->GetStorage(localURI);
498         if(!localSto) {
499                 Log("SHUTTLE",
500                         Form("StoreOCDB - cannot activate local %s storage", type));
501                 return kFALSE;
502         }
503         AliCDBPath aPath(GetOfflineDetName(fCurrentDetector.Data()),"*","*");
504         // Local objects were stored with current run as Grid version!
505         TList* localEntries = localSto->GetAll(aPath.GetPath(), GetCurrentRun(), GetCurrentRun());
506         localEntries->SetOwner(1);
507
508         // loop on local stored objects
509         TIter localIter(localEntries);
510         AliCDBEntry *aLocEntry = 0;
511         while((aLocEntry = dynamic_cast<AliCDBEntry*> (localIter.Next()))){
512                 aLocEntry->SetOwner(1);
513                 AliCDBId aLocId = aLocEntry->GetId();
514                 aLocEntry->SetVersion(-1);
515                 aLocEntry->SetSubVersion(-1);
516
517                 // If local object is valid up to infinity we store it only if it is
518                 // the first unprocessed run!
519                 if (aLocId.GetLastRun() == AliCDBRunRange::Infinity() &&
520                         !fFirstUnprocessed[GetDetPos(fCurrentDetector)])
521                 {
522                         Log("SHUTTLE", Form("StoreOCDB - %s: object %s has validity infinite but "
523                                                 "there are previous unprocessed runs!",
524                                                 fCurrentDetector.Data(), aLocId.GetPath().Data()));
525                         continue;
526                 }
527
528                 // loop on Grid valid Id's
529                 Bool_t store = kTRUE;
530                 TIter gridIter(gridIds);
531                 AliCDBId* aGridId = 0;
532                 while((aGridId = dynamic_cast<AliCDBId*> (gridIter.Next()))){
533                         if(aGridId->GetPath() != aLocId.GetPath()) continue;
534                         // skip all objects valid up to infinity
535                         if(aGridId->GetLastRun() == AliCDBRunRange::Infinity()) continue;
536                         // if we get here, it means there's already some more recent object stored on Grid!
537                         store = kFALSE;
538                         break;
539                 }
540
541                 // If we get here, the file can be stored!
542                 Bool_t storeOk = gridSto->Put(aLocEntry);
543                 if(!store || storeOk){
544
545                         if (!store)
546                         {
547                                 Log(fCurrentDetector.Data(),
548                                         Form("StoreOCDB - A more recent object already exists in %s storage: <%s>",
549                                                 type, aGridId->ToString().Data()));
550                         } else {
551                                 Log("SHUTTLE",
552                                         Form("StoreOCDB - Object <%s> successfully put into %s storage",
553                                                 aLocId.ToString().Data(), type));
554                                 Log(fCurrentDetector.Data(),
555                                         Form("StoreOCDB - Object <%s> successfully put into %s storage",
556                                                 aLocId.ToString().Data(), type));
557                         }
558
559                         // removing local filename...
560                         TString filename;
561                         localSto->IdToFilename(aLocId, filename);
562                         AliInfo(Form("Removing local file %s", filename.Data()));
563                         RemoveFile(filename.Data());
564                         continue;
565                 } else  {
566                         Log("SHUTTLE",
567                                 Form("StoreOCDB - Grid %s storage of object <%s> failed",
568                                         type, aLocId.ToString().Data()));
569                         Log(fCurrentDetector.Data(),
570                                 Form("StoreOCDB - Grid %s storage of object <%s> failed",
571                                         type, aLocId.ToString().Data()));
572                         result = kFALSE;
573                 }
574         }
575         localEntries->Clear();
576
577         return result;
578 }
579
580 //______________________________________________________________________________________________
581 Bool_t AliShuttle::CleanReferenceStorage(const char* detector)
582 {
583         // clears the directory used to store reference files of a given subdetector
584   
585         AliCDBManager* man = AliCDBManager::Instance();
586         AliCDBStorage* sto = man->GetStorage(fgkLocalRefStorage);
587         TString localBaseFolder = sto->GetBaseFolder();
588
589         TString targetDir = GetRefFilePrefix(localBaseFolder.Data(), detector);
590         
591         Log("SHUTTLE", Form("Cleaning %s", targetDir.Data()));
592
593         TString begin;
594         begin.Form("%d_", GetCurrentRun());
595         
596         TSystemDirectory* baseDir = new TSystemDirectory("/", targetDir);
597         if (!baseDir)
598                 return kTRUE;
599                 
600         TList* dirList = baseDir->GetListOfFiles();
601         delete baseDir;
602         
603         if (!dirList) return kTRUE;
604                         
605         if (dirList->GetEntries() < 3) 
606         {
607                 delete dirList;
608                 return kTRUE;
609         }
610                                 
611         Int_t nDirs = 0, nDel = 0;
612         TIter dirIter(dirList);
613         TSystemFile* entry = 0;
614
615         Bool_t success = kTRUE;
616         
617         while ((entry = dynamic_cast<TSystemFile*> (dirIter.Next())))
618         {                                       
619                 if (entry->IsDirectory())
620                         continue;
621                 
622                 TString fileName(entry->GetName());
623                 if (!fileName.BeginsWith(begin))
624                         continue;
625                         
626                 nDirs++;
627                                                 
628                 // delete file
629                 Int_t result = gSystem->Unlink(fileName.Data());
630                 
631                 if (result)
632                 {
633                         Log("SHUTTLE", Form("Could not delete file %s!", fileName.Data()));
634                         success = kFALSE;
635                 } else {
636                         nDel++;
637                 }
638         }
639
640         if(nDirs > 0)
641                 Log("SHUTTLE", Form("CleanReferenceStorage - %d (over %d) reference files in folder %s were deleted.", 
642                         nDel, nDirs, targetDir.Data()));
643
644                 
645         delete dirList;
646         return success;
647
648
649
650
651
652
653   Int_t result = gSystem->GetPathInfo(targetDir, 0, (Long64_t*) 0, 0, 0);
654   if (result == 0)
655   {
656     // delete directory
657     result = gSystem->Exec(Form("rm -r %s", targetDir.Data()));
658     if (result != 0)
659     {  
660       Log("SHUTTLE", Form("StoreReferenceFile - Could not clear directory %s", targetDir.Data()));
661       return kFALSE;
662     }
663   }
664
665   result = gSystem->mkdir(targetDir, kTRUE);
666   if (result != 0)
667   {
668     Log("SHUTTLE", Form("StoreReferenceFile - Error creating base directory %s", targetDir.Data()));
669     return kFALSE;
670   }
671         
672   return kTRUE;
673 }
674
675 //______________________________________________________________________________________________
676 Bool_t AliShuttle::StoreReferenceFile(const char* detector, const char* localFile, const char* gridFileName)
677 {
678         //
679         // Stores reference file directly (without opening it). This function stores the file locally.
680         //
681         // The file is stored under the following location: 
682         // <base folder of local reference storage>/<DET>/<RUN#>_<gridFileName>
683         // where <gridFileName> is the second parameter given to the function
684         // 
685         
686         if (fTestMode & kErrorStorage)
687         {
688                 Log(fCurrentDetector, "StoreReferenceFile - In TESTMODE - Simulating error while storing locally");
689                 return kFALSE;
690         }
691         
692         AliCDBManager* man = AliCDBManager::Instance();
693         AliCDBStorage* sto = man->GetStorage(fgkLocalRefStorage);
694         
695         TString localBaseFolder = sto->GetBaseFolder();
696         
697         TString targetDir = GetRefFilePrefix(localBaseFolder.Data(), detector); 
698         
699         //try to open folder, if does not exist
700         void* dir = gSystem->OpenDirectory(targetDir.Data());
701         if (dir == NULL) {
702                 if (gSystem->mkdir(targetDir.Data(), kTRUE)) {
703                         Log("SHUTTLE", Form("Can't open directory <%s>", targetDir.Data()));
704                         return kFALSE;
705                 }
706
707         } else {
708                 gSystem->FreeDirectory(dir);
709         }
710
711         TString target;
712         target.Form("%s/%d_%s", targetDir.Data(), GetCurrentRun(), gridFileName);
713         
714         Int_t result = gSystem->GetPathInfo(localFile, 0, (Long64_t*) 0, 0, 0);
715         if (result)
716         {
717                 Log("SHUTTLE", Form("StoreReferenceFile - %s does not exist", localFile));
718                 return kFALSE;
719         }
720
721         result = gSystem->CopyFile(localFile, target);
722
723         if (result == 0)
724         {
725                 Log("SHUTTLE", Form("StoreReferenceFile - File %s stored locally to %s", localFile, target.Data()));
726                 return kTRUE;
727         }
728         else
729         {
730                 Log("SHUTTLE", Form("StoreReferenceFile - Could not store file %s to %s!. Error code = %d", 
731                                 localFile, target.Data(), result));
732                 return kFALSE;
733         }       
734 }
735
736 //______________________________________________________________________________________________
737 Bool_t AliShuttle::StoreRefFilesToGrid()
738 {
739         //
740         // Transfers the reference file to the Grid.
741         //
742         // The files are stored under the following location: 
743         // <base folder of reference storage>/<DET>/<RUN#>_<gridFileName>
744         //
745         
746         AliCDBManager* man = AliCDBManager::Instance();
747         AliCDBStorage* sto = man->GetStorage(fgkLocalRefStorage);
748         if (!sto)
749                 return kFALSE;
750         TString localBaseFolder = sto->GetBaseFolder();
751                 
752         TString dir = GetRefFilePrefix(localBaseFolder.Data(), fCurrentDetector.Data());
753                 
754         AliCDBStorage* gridSto = man->GetStorage(fgkMainRefStorage);
755         if (!gridSto)
756                 return kFALSE;
757         
758         TString gridBaseFolder = gridSto->GetBaseFolder();
759
760         TString alienDir = GetRefFilePrefix(gridBaseFolder.Data(), fCurrentDetector.Data());
761         
762         TString begin;
763         begin.Form("%d_", GetCurrentRun());
764         
765         TSystemDirectory* baseDir = new TSystemDirectory("/", dir);
766         if (!baseDir)
767                 return kTRUE;
768                 
769         TList* dirList = baseDir->GetListOfFiles();
770         delete baseDir;
771         
772         if (!dirList) return kTRUE;
773                 
774         if (dirList->GetEntries() < 3) 
775         {
776                 delete dirList;
777                 return kTRUE;
778         }
779                         
780         if (!gGrid)
781         { 
782                 Log("SHUTTLE", "Connection to Grid failed: Cannot continue!");
783                 delete dirList;
784                 return kFALSE;
785         }
786         
787         Int_t nDirs = 0, nTransfer = 0;
788         TIter dirIter(dirList);
789         TSystemFile* entry = 0;
790
791         Bool_t success = kTRUE;
792         Bool_t first = kTRUE;
793         
794         while ((entry = dynamic_cast<TSystemFile*> (dirIter.Next())))
795         {                       
796                 if (entry->IsDirectory())
797                         continue;
798                         
799                 TString fileName(entry->GetName());
800                 if (!fileName.BeginsWith(begin))
801                         continue;
802                         
803                 nDirs++;
804                         
805                 if (first)
806                 {
807                         first = kFALSE;
808                         // check that DET folder exists, otherwise create it
809                         TGridResult* result = gGrid->Ls(alienDir.Data(), "a");
810                         
811                         if (!result)
812                         {
813                                 delete dirList;
814                                 return kFALSE;
815                         }
816                         
817                         if (!result->GetFileName(1)) // TODO: It looks like element 0 is always 0!!
818                         {
819                                 if (!gGrid->Mkdir(alienDir.Data(),"",0))
820                                 {
821                                         Log("SHUTTLE", Form("StoreRefFilesToGrid - Cannot create directory %s",
822                                                         alienDir.Data()));
823                                         delete dirList;
824                                         return kFALSE;
825                                 } else {
826                                         Log("SHUTTLE",Form("Folder %s created", alienDir.Data()));
827                                 }
828                                 
829                         } else {
830                                         Log("SHUTTLE",Form("Folder %s found", alienDir.Data()));
831                         }
832                 }
833                         
834                 TString fullLocalPath;
835                 fullLocalPath.Form("%s/%s", dir.Data(), fileName.Data());
836                 
837                 TString fullGridPath;
838                 fullGridPath.Form("alien://%s/%s", alienDir.Data(), fileName.Data());
839
840                 TFileMerger fileMerger;
841                 Bool_t result = fileMerger.Cp(fullLocalPath, fullGridPath);
842                 
843                 if (result)
844                 {
845                         Log("SHUTTLE", Form("StoreRefFilesToGrid - Copying local file %s to %s succeeded!", fullLocalPath.Data(), fullGridPath.Data()));
846                         RemoveFile(fullLocalPath);
847                         nTransfer++;
848                 }
849                 else
850                 {
851                         Log("SHUTTLE", Form("StoreRefFilesToGrid - Copying local file %s to %s FAILED!", fullLocalPath.Data(), fullGridPath.Data()));
852                         success = kFALSE;
853                 }
854         }
855
856         Log("SHUTTLE", Form("StoreRefFilesToGrid - %d (over %d) reference files in folder %s copied to Grid.", nTransfer, nDirs, dir.Data()));
857
858                 
859         delete dirList;
860         return success;
861 }
862
863 //______________________________________________________________________________________________
864 const char* AliShuttle::GetRefFilePrefix(const char* base, const char* detector)
865 {
866         //
867         // Get folder name of reference files 
868         //
869
870         TString offDetStr(GetOfflineDetName(detector));
871         TString dir;
872         if (offDetStr == "ITS" || offDetStr == "MUON" || offDetStr == "PHOS")
873         {
874                 dir.Form("%s/%s/%s", base, offDetStr.Data(), detector);
875         } else {
876                 dir.Form("%s/%s", base, offDetStr.Data());
877         }
878         
879         return dir.Data();
880         
881
882 }
883 //______________________________________________________________________________________________
884 void AliShuttle::CleanLocalStorage(const TString& uri)
885 {
886         //
887         // Called in case the preprocessor is declared failed. Remove remaining objects from the local storages.
888         //
889
890         const char* type = 0;
891         if(uri == fgkLocalCDB) {
892                 type = "OCDB";
893         } else if(uri == fgkLocalRefStorage) {
894                 type = "Reference";
895         } else {
896                 AliError(Form("Invalid storage URI: %s", uri.Data()));
897                 return;
898         }
899
900         AliCDBManager* man = AliCDBManager::Instance();
901
902         // open local storage
903         AliCDBStorage *localSto = man->GetStorage(uri);
904         if(!localSto) {
905                 Log("SHUTTLE",
906                         Form("CleanLocalStorage - cannot activate local %s storage", type));
907                 return;
908         }
909
910         TString filename(Form("%s/%s/*/Run*_v%d_s*.root",
911                 localSto->GetBaseFolder().Data(), GetOfflineDetName(fCurrentDetector.Data()), GetCurrentRun()));
912
913         AliInfo(Form("filename = %s", filename.Data()));
914
915         AliInfo(Form("Removing remaining local files from run %d and detector %s ...",
916                 GetCurrentRun(), fCurrentDetector.Data()));
917
918         RemoveFile(filename.Data());
919
920 }
921
922 //______________________________________________________________________________________________
923 void AliShuttle::RemoveFile(const char* filename)
924 {
925         //
926         // removes local file
927         //
928
929         TString command(Form("rm -f %s", filename));
930
931         Int_t result = gSystem->Exec(command.Data());
932         if(result != 0)
933         {
934                 Log("SHUTTLE", Form("RemoveFile - %s: Cannot remove file %s!",
935                         fCurrentDetector.Data(), filename));
936         }
937 }
938
939 //______________________________________________________________________________________________
940 AliShuttleStatus* AliShuttle::ReadShuttleStatus()
941 {
942         //
943         // Reads the AliShuttleStatus from the CDB
944         //
945
946         if (fStatusEntry){
947                 delete fStatusEntry;
948                 fStatusEntry = 0;
949         }
950
951         fStatusEntry = AliCDBManager::Instance()->GetStorage(GetLocalCDB())
952                 ->Get(Form("/SHUTTLE/STATUS/%s", fCurrentDetector.Data()), GetCurrentRun());
953
954         if (!fStatusEntry) return 0;
955         fStatusEntry->SetOwner(1);
956
957         AliShuttleStatus* status = dynamic_cast<AliShuttleStatus*> (fStatusEntry->GetObject());
958         if (!status) {
959                 AliError("Invalid object stored to CDB!");
960                 return 0;
961         }
962
963         return status;
964 }
965
966 //______________________________________________________________________________________________
967 Bool_t AliShuttle::WriteShuttleStatus(AliShuttleStatus* status)
968 {
969         //
970         // writes the status for one subdetector
971         //
972
973         if (fStatusEntry){
974                 delete fStatusEntry;
975                 fStatusEntry = 0;
976         }
977
978         Int_t run = GetCurrentRun();
979
980         AliCDBId id(AliCDBPath("SHUTTLE", "STATUS", fCurrentDetector), run, run);
981
982         fStatusEntry = new AliCDBEntry(status, id, new AliCDBMetaData);
983         fStatusEntry->SetOwner(1);
984
985         UInt_t result = AliCDBManager::Instance()->GetStorage(fgkLocalCDB)->Put(fStatusEntry);
986
987         if (!result) {
988                 Log("SHUTTLE", Form("WriteShuttleStatus - Failed for %s, run %d",
989                                                 fCurrentDetector.Data(), run));
990                 return kFALSE;
991         }
992         
993         SendMLInfo();
994
995         return kTRUE;
996 }
997
998 //______________________________________________________________________________________________
999 void AliShuttle::UpdateShuttleStatus(AliShuttleStatus::Status newStatus, Bool_t increaseCount)
1000 {
1001         //
1002         // changes the AliShuttleStatus for the given detector and run to the given status
1003         //
1004
1005         if (!fStatusEntry){
1006                 AliError("UNEXPECTED: fStatusEntry empty");
1007                 return;
1008         }
1009
1010         AliShuttleStatus* status = dynamic_cast<AliShuttleStatus*> (fStatusEntry->GetObject());
1011
1012         if (!status){
1013                 Log("SHUTTLE", "UNEXPECTED: status could not be read from current CDB entry");
1014                 return;
1015         }
1016
1017         TString actionStr = Form("UpdateShuttleStatus - %s: Changing state from %s to %s",
1018                                 fCurrentDetector.Data(),
1019                                 status->GetStatusName(),
1020                                 status->GetStatusName(newStatus));
1021         Log("SHUTTLE", actionStr);
1022         SetLastAction(actionStr);
1023
1024         status->SetStatus(newStatus);
1025         if (increaseCount) status->IncreaseCount();
1026
1027         AliCDBManager::Instance()->GetStorage(fgkLocalCDB)->Put(fStatusEntry);
1028
1029         SendMLInfo();
1030 }
1031
1032 //______________________________________________________________________________________________
1033 void AliShuttle::SendMLInfo()
1034 {
1035         //
1036         // sends ML information about the current status of the current detector being processed
1037         //
1038         
1039         AliShuttleStatus* status = dynamic_cast<AliShuttleStatus*> (fStatusEntry->GetObject());
1040         
1041         if (!status){
1042                 Log("SHUTTLE", "SendMLInfo - UNEXPECTED: status could not be read from current CDB entry");
1043                 return;
1044         }
1045         
1046         TMonaLisaText  mlStatus(Form("%s_status", fCurrentDetector.Data()), status->GetStatusName());
1047         TMonaLisaValue mlRetryCount(Form("%s_count", fCurrentDetector.Data()), status->GetCount());
1048
1049         TList mlList;
1050         mlList.Add(&mlStatus);
1051         mlList.Add(&mlRetryCount);
1052
1053         fMonaLisa->SendParameters(&mlList);
1054 }
1055
1056 //______________________________________________________________________________________________
1057 Bool_t AliShuttle::ContinueProcessing()
1058 {
1059         // this function reads the AliShuttleStatus information from CDB and
1060         // checks if the processing should be continued
1061         // if yes it returns kTRUE and updates the AliShuttleStatus with nextStatus
1062
1063         if (!fConfig->HostProcessDetector(fCurrentDetector)) return kFALSE;
1064
1065         AliPreprocessor* aPreprocessor =
1066                 dynamic_cast<AliPreprocessor*> (fPreprocessorMap.GetValue(fCurrentDetector));
1067         if (!aPreprocessor)
1068         {
1069                 AliInfo(Form("%s: no preprocessor registered", fCurrentDetector.Data()));
1070                 return kFALSE;
1071         }
1072
1073         AliShuttleLogbookEntry::Status entryStatus =
1074                 fLogbookEntry->GetDetectorStatus(fCurrentDetector);
1075
1076         if(entryStatus != AliShuttleLogbookEntry::kUnprocessed) {
1077                 AliInfo(Form("ContinueProcessing - %s is %s",
1078                                 fCurrentDetector.Data(),
1079                                 fLogbookEntry->GetDetectorStatusName(entryStatus)));
1080                 return kFALSE;
1081         }
1082
1083         // if we get here, according to Shuttle logbook subdetector is in UNPROCESSED state
1084
1085         // check if current run is first unprocessed run for current detector
1086         if (fConfig->StrictRunOrder(fCurrentDetector) &&
1087                 !fFirstUnprocessed[GetDetPos(fCurrentDetector)])
1088         {
1089                 if (fTestMode == kNone)
1090                 {
1091                         Log("SHUTTLE", Form("ContinueProcessing - %s requires strict run ordering but this is not the first unprocessed run!"));
1092                         return kFALSE;
1093                 }
1094                 else
1095                 {
1096                         Log("SHUTTLE", Form("ContinueProcessing - In TESTMODE - Although %s requires strict run ordering and this is not the first unprocessed run, the SHUTTLE continues"));
1097                 }
1098         }
1099
1100         AliShuttleStatus* status = ReadShuttleStatus();
1101         if (!status) {
1102                 // first time
1103                 Log("SHUTTLE", Form("ContinueProcessing - %s: Processing first time",
1104                                 fCurrentDetector.Data()));
1105                 status = new AliShuttleStatus(AliShuttleStatus::kStarted);
1106                 return WriteShuttleStatus(status);
1107         }
1108
1109         // The following two cases shouldn't happen if Shuttle Logbook was correctly updated.
1110         // If it happens it may mean Logbook updating failed... let's do it now!
1111         if (status->GetStatus() == AliShuttleStatus::kDone ||
1112             status->GetStatus() == AliShuttleStatus::kFailed){
1113                 Log("SHUTTLE", Form("ContinueProcessing - %s is already %s. Updating Shuttle Logbook",
1114                                         fCurrentDetector.Data(),
1115                                         status->GetStatusName(status->GetStatus())));
1116                 UpdateShuttleLogbook(fCurrentDetector.Data(),
1117                                         status->GetStatusName(status->GetStatus()));
1118                 return kFALSE;
1119         }
1120
1121         if (status->GetStatus() == AliShuttleStatus::kStoreError) {
1122                 Log("SHUTTLE",
1123                         Form("ContinueProcessing - %s: Grid storage of one or more objects failed. Trying again now",
1124                                 fCurrentDetector.Data()));
1125                 UpdateShuttleStatus(AliShuttleStatus::kStoreStarted);
1126                 if (StoreOCDB()){
1127                         Log("SHUTTLE", Form("ContinueProcessing - %s: all objects successfully stored into main storage",
1128                                 fCurrentDetector.Data()));
1129                         UpdateShuttleStatus(AliShuttleStatus::kDone);
1130                         UpdateShuttleLogbook(fCurrentDetector.Data(), "DONE");
1131                 } else {
1132                         Log("SHUTTLE",
1133                                 Form("ContinueProcessing - %s: Grid storage failed again",
1134                                         fCurrentDetector.Data()));
1135                         UpdateShuttleStatus(AliShuttleStatus::kStoreError);
1136                 }
1137                 return kFALSE;
1138         }
1139
1140         // if we get here, there is a restart
1141         Bool_t cont = kFALSE;
1142
1143         // abort conditions
1144         if (status->GetCount() >= fConfig->GetMaxRetries()) {
1145                 Log("SHUTTLE", Form("ContinueProcessing - %s failed %d times in status %s - "
1146                                 "Updating Shuttle Logbook", fCurrentDetector.Data(),
1147                                 status->GetCount(), status->GetStatusName()));
1148                 UpdateShuttleLogbook(fCurrentDetector.Data(), "FAILED");
1149                 UpdateShuttleStatus(AliShuttleStatus::kFailed);
1150
1151                 // there may still be objects in local OCDB and reference storage
1152                 // and FXS databases may be not updated: do it now!
1153                 
1154                 // TODO Currently disabled, we want to keep files in case of failure!
1155                 // CleanLocalStorage(fgkLocalCDB);
1156                 // CleanLocalStorage(fgkLocalRefStorage);
1157                 // UpdateTableFailCase();
1158                 
1159                 // Send mail to detector expert!
1160                 AliInfo(Form("Sending mail to %s expert...", fCurrentDetector.Data()));
1161                 if (!SendMail())
1162                         Log("SHUTTLE", Form("ContinueProcessing - Could not send mail to %s expert",
1163                                         fCurrentDetector.Data()));
1164
1165         } else {
1166                 Log("SHUTTLE", Form("ContinueProcessing - %s: restarting. "
1167                                 "Aborted before with %s. Retry number %d.", fCurrentDetector.Data(),
1168                                 status->GetStatusName(), status->GetCount()));
1169                 Bool_t increaseCount = kTRUE;
1170                 if (status->GetStatus() == AliShuttleStatus::kDCSError || status->GetStatus() == AliShuttleStatus::kDCSStarted)
1171                         increaseCount = kFALSE;
1172                 UpdateShuttleStatus(AliShuttleStatus::kStarted, increaseCount);
1173                 cont = kTRUE;
1174         }
1175
1176         return cont;
1177 }
1178
1179 //______________________________________________________________________________________________
1180 Bool_t AliShuttle::Process(AliShuttleLogbookEntry* entry)
1181 {
1182         //
1183         // Makes data retrieval for all detectors in the configuration.
1184         // entry: Shuttle logbook entry, contains run paramenters and status of detectors
1185         // (Unprocessed, Inactive, Failed or Done).
1186         // Returns kFALSE in case of error occured and kTRUE otherwise
1187         //
1188
1189         if (!entry) return kFALSE;
1190
1191         fLogbookEntry = entry;
1192
1193         AliInfo(Form("\n\n \t\t\t^*^*^*^*^*^*^*^*^*^*^*^* run %d: START ^*^*^*^*^*^*^*^*^*^*^*^* \n",
1194                                         GetCurrentRun()));
1195
1196         // create ML instance that monitors this run
1197         fMonaLisa = new TMonaLisaWriter(Form("%d", GetCurrentRun()), "SHUTTLE", "aliendb1.cern.ch");
1198         // disable monitoring of other parameters that come e.g. from TFile
1199         gMonitoringWriter = 0;
1200
1201         // Send the information to ML
1202         TMonaLisaText  mlStatus("SHUTTLE_status", "Processing");
1203         TMonaLisaText  mlRunType("SHUTTLE_runtype", Form("%s (%s)", entry->GetRunType(), entry->GetRunParameter("log")));
1204
1205         TList mlList;
1206         mlList.Add(&mlStatus);
1207         mlList.Add(&mlRunType);
1208
1209         fMonaLisa->SendParameters(&mlList);
1210
1211         if (fLogbookEntry->IsDone())
1212         {
1213                 Log("SHUTTLE","Process - Shuttle is already DONE. Updating logbook");
1214                 UpdateShuttleLogbook("shuttle_done");
1215                 fLogbookEntry = 0;
1216                 return kTRUE;
1217         }
1218
1219         // read test mode if flag is set
1220         if (fReadTestMode)
1221         {
1222                 fTestMode = kNone;
1223                 TString logEntry(entry->GetRunParameter("log"));
1224                 //printf("log entry = %s\n", logEntry.Data());
1225                 TString searchStr("Testmode: ");
1226                 Int_t pos = logEntry.Index(searchStr.Data());
1227                 //printf("%d\n", pos);
1228                 if (pos >= 0)
1229                 {
1230                         TSubString subStr = logEntry(pos + searchStr.Length(), logEntry.Length());
1231                         //printf("%s\n", subStr.String().Data());
1232                         TString newStr(subStr.Data());
1233                         TObjArray* token = newStr.Tokenize(' ');
1234                         if (token)
1235                         {
1236                                 //token->Print();
1237                                 TObjString* tmpStr = dynamic_cast<TObjString*> (token->First());
1238                                 if (tmpStr)
1239                                 {
1240                                         Int_t testMode = tmpStr->String().Atoi();
1241                                         if (testMode > 0)
1242                                         {
1243                                                 Log("SHUTTLE", Form("Enabling test mode %d", testMode));
1244                                                 SetTestMode((TestMode) testMode);
1245                                         }
1246                                 }
1247                                 delete token;          
1248                         }
1249                 }
1250         }
1251         
1252         Log("SHUTTLE", Form("The test mode flag is %d", (Int_t) fTestMode));
1253         
1254         fLogbookEntry->Print("all");
1255
1256         // Initialization
1257         Bool_t hasError = kFALSE;
1258
1259         AliCDBStorage *mainCDBSto = AliCDBManager::Instance()->GetStorage(fgkMainCDB);
1260         if(mainCDBSto) mainCDBSto->QueryCDB(GetCurrentRun());
1261         AliCDBStorage *mainRefSto = AliCDBManager::Instance()->GetStorage(fgkMainRefStorage);
1262         if(mainRefSto) mainRefSto->QueryCDB(GetCurrentRun());
1263
1264         // Loop on detectors in the configuration
1265         TIter iter(fConfig->GetDetectors());
1266         TObjString* aDetector = 0;
1267
1268         while ((aDetector = (TObjString*) iter.Next()))
1269         {
1270                 fCurrentDetector = aDetector->String();
1271
1272                 if (ContinueProcessing() == kFALSE) continue;
1273
1274                 AliInfo(Form("\n\n \t\t\t****** run %d - %s: START  ******",
1275                                                 GetCurrentRun(), aDetector->GetName()));
1276
1277                 for(Int_t iSys=0;iSys<3;iSys++) fFXSCalled[iSys]=kFALSE;
1278
1279                 Log(fCurrentDetector.Data(), "Starting processing");
1280
1281                 Int_t pid = fork();
1282
1283                 if (pid < 0)
1284                 {
1285                         Log("SHUTTLE", "ERROR: Forking failed");
1286                 }
1287                 else if (pid > 0)
1288                 {
1289                         // parent
1290                         AliInfo(Form("In parent process of %d - %s: Starting monitoring",
1291                                                         GetCurrentRun(), aDetector->GetName()));
1292
1293                         Long_t begin = time(0);
1294
1295                         int status; // to be used with waitpid, on purpose an int (not Int_t)!
1296                         while (waitpid(pid, &status, WNOHANG) == 0)
1297                         {
1298                                 Long_t expiredTime = time(0) - begin;
1299
1300                                 if (expiredTime > fConfig->GetPPTimeOut())
1301                                 {
1302                                         TString tmp;
1303                                         tmp.Form("Process of %s time out. Run time: %d seconds. Killing...",
1304                                                                 fCurrentDetector.Data(), expiredTime);
1305                                         Log("SHUTTLE", tmp);
1306                                         Log(fCurrentDetector, tmp);
1307
1308                                         kill(pid, 9);
1309
1310                                         UpdateShuttleStatus(AliShuttleStatus::kPPTimeOut);
1311                                         hasError = kTRUE;
1312
1313                                         gSystem->Sleep(1000);
1314                                 }
1315                                 else
1316                                 {
1317                                         gSystem->Sleep(1000);
1318                                         
1319                                         TString checkStr;
1320                                         checkStr.Form("ps -o vsize --pid %d | tail -n 1", pid);
1321                                         FILE* pipe = gSystem->OpenPipe(checkStr, "r");
1322                                         if (!pipe)
1323                                         {
1324                                                 Log("SHUTTLE", Form("Error: Could not open pipe to %s", checkStr.Data()));
1325                                                 continue;
1326                                         }
1327                                                 
1328                                         char buffer[100];
1329                                         if (!fgets(buffer, 100, pipe))
1330                                         {
1331                                                 Log("SHUTTLE", "Error: ps did not return anything");
1332                                                 gSystem->ClosePipe(pipe);
1333                                                 continue;
1334                                         }
1335                                         gSystem->ClosePipe(pipe);
1336                                         
1337                                         //Log("SHUTTLE", Form("ps returned %s", buffer));
1338                                         
1339                                         Int_t mem = 0;
1340                                         if ((sscanf(buffer, "%d\n", &mem) != 1) || !mem)
1341                                         {
1342                                                 Log("SHUTTLE", "Error: Could not parse output of ps");
1343                                                 continue;
1344                                         }
1345                                         
1346                                         if (expiredTime % 60 == 0)
1347                                                 Log("SHUTTLE", Form("%s: Checking process. Run time: %d seconds - Memory consumption: %d KB",
1348                                                                 fCurrentDetector.Data(), expiredTime, mem));
1349                                         
1350                                         if (mem > fConfig->GetPPMaxMem())
1351                                         {
1352                                                 TString tmp;
1353                                                 tmp.Form("Process exceeds maximum allowed memory (%d KB > %d KB). Killing...",
1354                                                         mem, fConfig->GetPPMaxMem());
1355                                                 Log("SHUTTLE", tmp);
1356                                                 Log(fCurrentDetector, tmp);
1357         
1358                                                 kill(pid, 9);
1359         
1360                                                 UpdateShuttleStatus(AliShuttleStatus::kPPOutOfMemory);
1361                                                 hasError = kTRUE;
1362         
1363                                                 gSystem->Sleep(1000);
1364                                         }
1365                                 }
1366                         }
1367
1368                         AliInfo(Form("In parent process of %d - %s: Client has terminated.",
1369                                                                 GetCurrentRun(), aDetector->GetName()));
1370
1371                         if (WIFEXITED(status))
1372                         {
1373                                 Int_t returnCode = WEXITSTATUS(status);
1374
1375                                 Log("SHUTTLE", Form("%s: the return code is %d", fCurrentDetector.Data(),
1376                                                                                 returnCode));
1377
1378                                 if (returnCode == 0) hasError = kTRUE;
1379                         }
1380                 }
1381                 else if (pid == 0)
1382                 {
1383                         // client
1384                         AliInfo(Form("In client process of %d - %s", GetCurrentRun(), aDetector->GetName()));
1385
1386                         AliInfo("Redirecting output...");
1387
1388                         if ((freopen(GetLogFileName(fCurrentDetector), "a", stdout)) == 0)
1389                         {
1390                                 Log("SHUTTLE", "Could not freopen stdout");
1391                         }
1392                         else
1393                         {
1394                                 fOutputRedirected = kTRUE;
1395                                 if ((dup2(fileno(stdout), fileno(stderr))) < 0)
1396                                         Log("SHUTTLE", "Could not redirect stderr");
1397                                 
1398                         }
1399                         
1400                         Bool_t success = ProcessCurrentDetector();
1401                         if (success) // Preprocessor finished successfully!
1402                         { 
1403                                 // Update time_processed field in FXS DB
1404                                 if (UpdateTable() == kFALSE)
1405                                         Log("SHUTTLE", Form("Process - %s: Could not update FXS databases!"));
1406
1407                                 // Transfer the data from local storage to main storage (Grid)
1408                                 UpdateShuttleStatus(AliShuttleStatus::kStoreStarted);
1409                                 if (StoreOCDB() == kFALSE)
1410                                 {
1411                                         AliInfo(Form("\n \t\t\t****** run %d - %s: STORAGE ERROR ****** \n\n",
1412                                                         GetCurrentRun(), aDetector->GetName()));
1413                                         UpdateShuttleStatus(AliShuttleStatus::kStoreError);
1414                                         success = kFALSE;
1415                                 } else {
1416                                         AliInfo(Form("\n \t\t\t****** run %d - %s: DONE ****** \n\n",
1417                                                         GetCurrentRun(), aDetector->GetName()));
1418                                         UpdateShuttleStatus(AliShuttleStatus::kDone);
1419                                         UpdateShuttleLogbook(fCurrentDetector, "DONE");
1420                                 }
1421                         }
1422
1423                         for (UInt_t iSys=0; iSys<3; iSys++)
1424                         {
1425                                 if (fFXSCalled[iSys]) fFXSlist[iSys].Clear();
1426                         }
1427
1428                         AliInfo(Form("Client process of %d - %s is exiting now with %d.",
1429                                                         GetCurrentRun(), aDetector->GetName(), success));
1430
1431                         // the client exits here
1432                         gSystem->Exit(success);
1433
1434                         AliError("We should never get here!!!");
1435                 }
1436         }
1437
1438         AliInfo(Form("\n\n \t\t\t^*^*^*^*^*^*^*^*^*^*^*^* run %d: FINISH ^*^*^*^*^*^*^*^*^*^*^*^* \n",
1439                                                         GetCurrentRun()));
1440
1441         //check if shuttle is done for this run, if so update logbook
1442         TObjArray checkEntryArray;
1443         checkEntryArray.SetOwner(1);
1444         TString whereClause = Form("where run=%d", GetCurrentRun());
1445         if (!QueryShuttleLogbook(whereClause.Data(), checkEntryArray) || checkEntryArray.GetEntries() == 0) {
1446                 Log("SHUTTLE", Form("Process - Warning: Cannot check status of run %d on Shuttle logbook!",
1447                                                 GetCurrentRun()));
1448                 return hasError == kFALSE;
1449         }
1450
1451         AliShuttleLogbookEntry* checkEntry = dynamic_cast<AliShuttleLogbookEntry*>
1452                                                 (checkEntryArray.At(0));
1453
1454         if (checkEntry)
1455         {
1456                 if (checkEntry->IsDone())
1457                 {
1458                         Log("SHUTTLE","Process - Shuttle is DONE. Updating logbook");
1459                         UpdateShuttleLogbook("shuttle_done");
1460                 }
1461                 else
1462                 {
1463                         for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
1464                         {
1465                                 if (checkEntry->GetDetectorStatus(iDet) == AliShuttleLogbookEntry::kUnprocessed)
1466                                 {
1467                                         AliDebug(2, Form("Run %d: setting %s as \"not first time unprocessed\"",
1468                                                         checkEntry->GetRun(), GetDetName(iDet)));
1469                                         fFirstUnprocessed[iDet] = kFALSE;
1470                                 }
1471                         }
1472                 }
1473         }
1474
1475         // remove ML instance
1476         delete fMonaLisa;
1477         fMonaLisa = 0;
1478
1479         fLogbookEntry = 0;
1480
1481         return hasError == kFALSE;
1482 }
1483
1484 //______________________________________________________________________________________________
1485 Bool_t AliShuttle::ProcessCurrentDetector()
1486 {
1487         //
1488         // Makes data retrieval just for a specific detector (fCurrentDetector).
1489         // Threre should be a configuration for this detector.
1490
1491         AliInfo(Form("Retrieving values for %s, run %d", fCurrentDetector.Data(), GetCurrentRun()));
1492
1493         if (!CleanReferenceStorage(fCurrentDetector.Data()))
1494                 return kFALSE;
1495
1496         TMap* dcsMap = 0;
1497
1498         // call preprocessor
1499         AliPreprocessor* aPreprocessor =
1500                 dynamic_cast<AliPreprocessor*> (fPreprocessorMap.GetValue(fCurrentDetector));
1501
1502         aPreprocessor->Initialize(GetCurrentRun(), GetCurrentStartTime(), GetCurrentEndTime());
1503
1504         Bool_t processDCS = aPreprocessor->ProcessDCS();
1505
1506         if (!processDCS)
1507         {
1508                 Log(fCurrentDetector, "The preprocessor requested to skip the retrieval of DCS values");
1509         }
1510         else if (fTestMode & kSkipDCS)
1511         {
1512                 Log(fCurrentDetector, "In TESTMODE - Skipping DCS processing!");
1513         } 
1514         else if (fTestMode & kErrorDCS)
1515         {
1516                 Log(fCurrentDetector, "In TESTMODE - Simulating DCS error");
1517                 UpdateShuttleStatus(AliShuttleStatus::kDCSStarted);
1518                 UpdateShuttleStatus(AliShuttleStatus::kDCSError);
1519                 return kFALSE;
1520         } else {
1521
1522                 UpdateShuttleStatus(AliShuttleStatus::kDCSStarted);
1523
1524                 TString host(fConfig->GetDCSHost(fCurrentDetector));
1525                 Int_t port = fConfig->GetDCSPort(fCurrentDetector);
1526
1527                 if (fConfig->GetDCSAliases(fCurrentDetector)->GetEntries() > 0)
1528                 {
1529                         dcsMap = GetValueSet(host, port, fConfig->GetDCSAliases(fCurrentDetector), kAlias);
1530                         if (!dcsMap)
1531                         {
1532                                 Log(fCurrentDetector, "ProcessCurrentDetector - Error while retrieving DCS aliases");
1533                                 UpdateShuttleStatus(AliShuttleStatus::kDCSError);
1534                                 return kFALSE;
1535                         }
1536                 }
1537                 
1538                 if (fConfig->GetDCSDataPoints(fCurrentDetector)->GetEntries() > 0)
1539                 {
1540                         TMap* dcsMap2 = GetValueSet(host, port, fConfig->GetDCSDataPoints(fCurrentDetector), kDP);
1541                         if (!dcsMap2)
1542                         {
1543                                 Log(fCurrentDetector, "ProcessCurrentDetector - Error while retrieving DCS data points");
1544                                 UpdateShuttleStatus(AliShuttleStatus::kDCSError);
1545                                 if (dcsMap)
1546                                         delete dcsMap;
1547                                 return kFALSE;
1548                         }
1549                         
1550                         if (!dcsMap)
1551                         {
1552                                 dcsMap = dcsMap2;
1553                         }
1554                         else // merge
1555                         {
1556                                 TIter iter(dcsMap2);
1557                                 TObjString* key = 0;
1558                                 while ((key = (TObjString*) iter.Next()))
1559                                         dcsMap->Add(key, dcsMap2->GetValue(key->String()));
1560                                         
1561                                 dcsMap2->SetOwner(kFALSE);
1562                                 delete dcsMap2;
1563                         }
1564                 }
1565                 
1566         }
1567
1568         // still no map?
1569         if (!dcsMap)
1570                 dcsMap = new TMap;
1571         
1572         // DCS Archive DB processing successful. Call Preprocessor!
1573         UpdateShuttleStatus(AliShuttleStatus::kPPStarted);
1574
1575         UInt_t returnValue = aPreprocessor->Process(dcsMap);
1576
1577         if (returnValue > 0) // Preprocessor error!
1578         {
1579                 Log(fCurrentDetector, Form("Preprocessor failed. Process returned %d.", returnValue));
1580                 UpdateShuttleStatus(AliShuttleStatus::kPPError);
1581                 dcsMap->DeleteAll();
1582                 delete dcsMap;
1583                 return kFALSE;
1584         }
1585         
1586         // preprocessor ok!
1587         UpdateShuttleStatus(AliShuttleStatus::kPPDone);
1588         Log(fCurrentDetector, Form("ProcessCurrentDetector - %s preprocessor returned success",
1589                                 fCurrentDetector.Data()));
1590
1591         dcsMap->DeleteAll();
1592         delete dcsMap;
1593
1594         return kTRUE;
1595 }
1596
1597 //______________________________________________________________________________________________
1598 Bool_t AliShuttle::QueryShuttleLogbook(const char* whereClause,
1599                 TObjArray& entries)
1600 {
1601         // Query DAQ's Shuttle logbook and fills detector status object.
1602         // Call QueryRunParameters to query DAQ logbook for run parameters.
1603         //
1604
1605         entries.SetOwner(1);
1606
1607         // check connection, in case connect
1608         if(!Connect(3)) return kFALSE;
1609
1610         TString sqlQuery;
1611         sqlQuery = Form("select * from %s %s order by run", fConfig->GetShuttlelbTable(), whereClause);
1612
1613         TSQLResult* aResult = fServer[3]->Query(sqlQuery);
1614         if (!aResult) {
1615                 AliError(Form("Can't execute query <%s>!", sqlQuery.Data()));
1616                 return kFALSE;
1617         }
1618
1619         AliDebug(2,Form("Query = %s", sqlQuery.Data()));
1620
1621         if(aResult->GetRowCount() == 0) {
1622                 AliInfo("No entries in Shuttle Logbook match request");
1623                 delete aResult;
1624                 return kTRUE;
1625         }
1626
1627         // TODO Check field count!
1628         const UInt_t nCols = 22;
1629         if (aResult->GetFieldCount() != (Int_t) nCols) {
1630                 AliError("Invalid SQL result field number!");
1631                 delete aResult;
1632                 return kFALSE;
1633         }
1634
1635         TSQLRow* aRow;
1636         while ((aRow = aResult->Next())) {
1637                 TString runString(aRow->GetField(0), aRow->GetFieldLength(0));
1638                 Int_t run = runString.Atoi();
1639
1640                 AliShuttleLogbookEntry *entry = QueryRunParameters(run);
1641                 if (!entry)
1642                         continue;
1643
1644                 // loop on detectors
1645                 for(UInt_t ii = 0; ii < nCols; ii++)
1646                         entry->SetDetectorStatus(aResult->GetFieldName(ii), aRow->GetField(ii));
1647
1648                 entries.AddLast(entry);
1649                 delete aRow;
1650         }
1651
1652         delete aResult;
1653         return kTRUE;
1654 }
1655
1656 //______________________________________________________________________________________________
1657 AliShuttleLogbookEntry* AliShuttle::QueryRunParameters(Int_t run)
1658 {
1659         //
1660         // Retrieve run parameters written in the DAQ logbook and sets them into AliShuttleLogbookEntry object
1661         //
1662
1663         // check connection, in case connect
1664         if (!Connect(3))
1665                 return 0;
1666
1667         TString sqlQuery;
1668         sqlQuery.Form("select * from %s where run=%d", fConfig->GetDAQlbTable(), run);
1669
1670         TSQLResult* aResult = fServer[3]->Query(sqlQuery);
1671         if (!aResult) {
1672                 AliError(Form("Can't execute query <%s>!", sqlQuery.Data()));
1673                 return 0;
1674         }
1675
1676         if (aResult->GetRowCount() == 0) {
1677                 Log("SHUTTLE", Form("QueryRunParameters - No entry in DAQ Logbook for run %d. Skipping", run));
1678                 delete aResult;
1679                 return 0;
1680         }
1681
1682         if (aResult->GetRowCount() > 1) {
1683                 AliError(Form("More than one entry in DAQ Logbook for run %d. Skipping", run));
1684                 delete aResult;
1685                 return 0;
1686         }
1687
1688         TSQLRow* aRow = aResult->Next();
1689         if (!aRow)
1690         {
1691                 AliError(Form("Could not retrieve row for run %d. Skipping", run));
1692                 delete aResult;
1693                 return 0;
1694         }
1695
1696         AliShuttleLogbookEntry* entry = new AliShuttleLogbookEntry(run);
1697
1698         for (Int_t ii = 0; ii < aResult->GetFieldCount(); ii++)
1699                 entry->SetRunParameter(aResult->GetFieldName(ii), aRow->GetField(ii));
1700
1701         UInt_t startTime = entry->GetStartTime();
1702         UInt_t endTime = entry->GetEndTime();
1703
1704         if (!startTime || !endTime || startTime > endTime) {
1705                 Log("SHUTTLE",
1706                         Form("QueryRunParameters - Invalid parameters for Run %d: startTime = %d, endTime = %d",
1707                                 run, startTime, endTime));
1708                 delete entry;
1709                 delete aRow;
1710                 delete aResult;
1711                 return 0;
1712         }
1713
1714         delete aRow;
1715         delete aResult;
1716
1717         return entry;
1718 }
1719
1720 //______________________________________________________________________________________________
1721 Bool_t AliShuttle::GetValueSet(const char* host, Int_t port, const char* entry,
1722                                 TObjArray* valueSet, DCSType type)
1723 {
1724         // Retrieve all "entry" data points from the DCS server
1725         // host, port: TSocket connection parameters
1726         // entry: name of the alias or data point
1727         // valueSet: array of retrieved AliDCSValue's
1728         // type: kAlias or kDP
1729
1730         AliDCSClient client(host, port, fTimeout, fRetries);
1731         if (!client.IsConnected())
1732         {
1733                 return kFALSE;
1734         }
1735
1736         Int_t result=0;
1737
1738         if (type == kAlias)
1739         {
1740                 result = client.GetAliasValues(entry,
1741                         GetCurrentStartTime(), GetCurrentEndTime(), valueSet);
1742         } else
1743         if (type == kDP)
1744         {
1745                 result = client.GetDPValues(entry,
1746                         GetCurrentStartTime(), GetCurrentEndTime(), valueSet);
1747         }
1748
1749         if (result < 0)
1750         {
1751                 Log(fCurrentDetector.Data(), Form("GetValueSet - Can't get '%s'! Reason: %s",
1752                         entry, AliDCSClient::GetErrorString(result)));
1753
1754                 if (result == AliDCSClient::fgkServerError)
1755                 {
1756                         Log(fCurrentDetector.Data(), Form("GetValueSet - Server error: %s",
1757                                 client.GetServerError().Data()));
1758                 }
1759
1760                 return kFALSE;
1761         }
1762
1763         return kTRUE;
1764 }
1765
1766 //______________________________________________________________________________________________
1767 TMap* AliShuttle::GetValueSet(const char* host, Int_t port, const TSeqCollection* entries,
1768                               DCSType type)
1769 {
1770         // Retrieve all "entry" data points from the DCS server
1771         // host, port: TSocket connection parameters
1772         // entries: list of name of the alias or data point
1773         // type: kAlias or kDP
1774         // returns TMap of values, 0 when failure
1775
1776         const Int_t kSplit = 100; // maximum number of DPs at a time
1777         
1778         Int_t totalEntries = entries->GetEntries();
1779         
1780         TMap* result = 0;
1781         
1782         for (Int_t index=0; index < totalEntries; index += kSplit)
1783         {
1784                 Int_t endIndex = index + kSplit;
1785         
1786                 AliDCSClient client(host, port, fTimeout, fRetries);
1787                 if (!client.IsConnected())
1788                         return 0;
1789
1790                 TMap* partialResult = 0;
1791
1792                 if (type == kAlias)
1793                 {
1794                         partialResult = client.GetAliasValues(entries, GetCurrentStartTime(), 
1795                                 GetCurrentEndTime(), index, endIndex);
1796                 } 
1797                 else if (type == kDP)
1798                 {
1799                         partialResult = client.GetDPValues(entries, GetCurrentStartTime(), 
1800                                 GetCurrentEndTime(), index, endIndex);
1801                 }
1802
1803                 if (partialResult == 0)
1804                 {
1805                         Log(fCurrentDetector.Data(), Form("GetValueSet - Can't get entries (%d...%d)! Reason: %s",
1806                                 index, endIndex, client.GetServerError().Data()));
1807         
1808                         if (result)
1809                                 delete result;
1810                                 
1811                         return 0;
1812                 }
1813                 
1814                 AliInfo(Form("Retrieved entries %d..%d (total %d); E.g. %s has %d values collected",
1815                                         index, endIndex, totalEntries, entries->At(index)->GetName(), ((TObjArray*)
1816                                         partialResult->GetValue(entries->At(index)->GetName()))->GetEntriesFast()));
1817                 
1818                 if (!result)
1819                 {
1820                         result = partialResult;
1821                 }
1822                 else
1823                 {               
1824                         TIter iter(partialResult);
1825                         TObjString* key = 0;
1826                         while ((key = (TObjString*) iter.Next()))
1827                                 result->Add(key, partialResult->GetValue(key->String()));
1828                                 
1829                         partialResult->SetOwner(kFALSE);
1830                         delete partialResult;
1831                 }
1832         
1833         }
1834
1835         return result;
1836 }
1837 //______________________________________________________________________________________________
1838 const char* AliShuttle::GetFile(Int_t system, const char* detector,
1839                 const char* id, const char* source)
1840 {
1841         // Get calibration file from file exchange servers
1842         // First queris the FXS database for the file name, using the run, detector, id and source info
1843         // then calls RetrieveFile(filename) for actual copy to local disk
1844         // run: current run being processed (given by Logbook entry fLogbookEntry)
1845         // detector: the Preprocessor name
1846         // id: provided as a parameter by the Preprocessor
1847         // source: provided by the Preprocessor through GetFileSources function
1848
1849         // check if test mode should simulate a FXS error
1850         if (fTestMode & kErrorFXSFiles)
1851         {
1852                 Log(detector, Form("GetFile - In TESTMODE - Simulating error while connecting to %s FXS", GetSystemName(system)));
1853                 return 0;
1854         }
1855         
1856         // check connection, in case connect
1857         if (!Connect(system))
1858         {
1859                 Log(detector, Form("GetFile - Couldn't connect to %s FXS database", GetSystemName(system)));
1860                 return 0;
1861         }
1862
1863         // Query preparation
1864         TString sourceName(source);
1865         Int_t nFields = 3;
1866         TString sqlQueryStart = Form("select filePath,size,fileChecksum from %s where",
1867                                                                 fConfig->GetFXSdbTable(system));
1868         TString whereClause = Form("run=%d and detector=\"%s\" and fileId=\"%s\"",
1869                                                                 GetCurrentRun(), detector, id);
1870
1871         if (system == kDAQ)
1872         {
1873                 whereClause += Form(" and DAQsource=\"%s\"", source);
1874         }
1875         else if (system == kDCS)
1876         {
1877                 sourceName="none";
1878         }
1879         else if (system == kHLT)
1880         {
1881                 whereClause += Form(" and DDLnumbers=\"%s\"", source);
1882                 nFields = 3;
1883         }
1884
1885         TString sqlQuery = Form("%s %s", sqlQueryStart.Data(), whereClause.Data());
1886
1887         AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
1888
1889         // Query execution
1890         TSQLResult* aResult = 0;
1891         aResult = dynamic_cast<TSQLResult*> (fServer[system]->Query(sqlQuery));
1892         if (!aResult) {
1893                 Log(detector, Form("GetFileName - Can't execute SQL query to %s database for: id = %s, source = %s",
1894                                 GetSystemName(system), id, sourceName.Data()));
1895                 return 0;
1896         }
1897
1898         if(aResult->GetRowCount() == 0)
1899         {
1900                 Log(detector,
1901                         Form("GetFileName - No entry in %s FXS db for: id = %s, source = %s",
1902                                 GetSystemName(system), id, sourceName.Data()));
1903                 delete aResult;
1904                 return 0;
1905         }
1906
1907         if (aResult->GetRowCount() > 1) {
1908                 Log(detector,
1909                         Form("GetFileName - More than one entry in %s FXS db for: id = %s, source = %s",
1910                                 GetSystemName(system), id, sourceName.Data()));
1911                 delete aResult;
1912                 return 0;
1913         }
1914
1915         if (aResult->GetFieldCount() != nFields) {
1916                 Log(detector,
1917                         Form("GetFileName - Wrong field count in %s FXS db for: id = %s, source = %s",
1918                                 GetSystemName(system), id, sourceName.Data()));
1919                 delete aResult;
1920                 return 0;
1921         }
1922
1923         TSQLRow* aRow = dynamic_cast<TSQLRow*> (aResult->Next());
1924
1925         if (!aRow){
1926                 Log(detector, Form("GetFileName - Empty set result in %s FXS db from query: id = %s, source = %s",
1927                                 GetSystemName(system), id, sourceName.Data()));
1928                 delete aResult;
1929                 return 0;
1930         }
1931
1932         TString filePath(aRow->GetField(0), aRow->GetFieldLength(0));
1933         TString fileSize(aRow->GetField(1), aRow->GetFieldLength(1));
1934         TString fileChecksum(aRow->GetField(2), aRow->GetFieldLength(2));
1935
1936         delete aResult;
1937         delete aRow;
1938
1939         AliDebug(2, Form("filePath = %s; size = %s, fileChecksum = %s",
1940                                 filePath.Data(), fileSize.Data(), fileChecksum.Data()));
1941
1942         // retrieved file is renamed to make it unique
1943         TString localFileName = Form("%s_%s_%d_%s_%s.shuttle",
1944                                         GetSystemName(system), detector, GetCurrentRun(), id, sourceName.Data());
1945
1946
1947         // file retrieval from FXS
1948         UInt_t nRetries = 0;
1949         UInt_t maxRetries = 3;
1950         Bool_t result = kFALSE;
1951
1952         // copy!! if successful TSystem::Exec returns 0
1953         while(nRetries++ < maxRetries) {
1954                 AliDebug(2, Form("Trying to copy file. Retry # %d", nRetries));
1955                 result = RetrieveFile(system, filePath.Data(), localFileName.Data());
1956                 if(!result)
1957                 {
1958                         Log(detector, Form("GetFileName - Copy of file %s from %s FXS failed",
1959                                         filePath.Data(), GetSystemName(system)));
1960                         continue;
1961                 } else {
1962                         AliInfo(Form("File %s copied from %s FXS into %s/%s",
1963                                                 filePath.Data(), GetSystemName(system),
1964                                                 GetShuttleTempDir(), localFileName.Data()));
1965                 }
1966
1967                 if (fileChecksum.Length()>0)
1968                 {
1969                         // compare md5sum of local file with the one stored in the FXS DB
1970                         Int_t md5Comp = gSystem->Exec(Form("md5sum %s/%s |grep %s 2>&1 > /dev/null",
1971                                                 GetShuttleTempDir(), localFileName.Data(), fileChecksum.Data()));
1972
1973                         if (md5Comp != 0)
1974                         {
1975                                 Log(detector, Form("GetFileName - md5sum of file %s does not match with local copy!",
1976                                                         filePath.Data()));
1977                                 result = kFALSE;
1978                                 continue;
1979                         }
1980                 } else {
1981                         Log(fCurrentDetector, Form("GetFile - md5sum of file %s not set in %s database, skipping comparison",
1982                                                         filePath.Data(), GetSystemName(system)));
1983                 }
1984                 if (result) break;
1985         }
1986
1987         if(!result) return 0;
1988
1989         fFXSCalled[system]=kTRUE;
1990         TObjString *fileParams = new TObjString(Form("%s#!?!#%s", id, sourceName.Data()));
1991         fFXSlist[system].Add(fileParams);
1992
1993         static TString fullLocalFileName;
1994         fullLocalFileName = TString::Format("%s/%s", GetShuttleTempDir(), localFileName.Data());
1995
1996         AliInfo(Form("fullLocalFileName = %s", fullLocalFileName.Data()));
1997
1998         return fullLocalFileName.Data();
1999
2000 }
2001
2002 //______________________________________________________________________________________________
2003 Bool_t AliShuttle::RetrieveFile(UInt_t system, const char* fxsFileName, const char* localFileName)
2004 {
2005         //
2006         // Copies file from FXS to local Shuttle machine
2007         //
2008
2009         // check temp directory: trying to cd to temp; if it does not exist, create it
2010         AliDebug(2, Form("Copy file %s from %s FXS into %s/%s",
2011                         GetSystemName(system), fxsFileName, GetShuttleTempDir(), localFileName));
2012
2013         void* dir = gSystem->OpenDirectory(GetShuttleTempDir());
2014         if (dir == NULL) {
2015                 if (gSystem->mkdir(GetShuttleTempDir(), kTRUE)) {
2016                         AliError(Form("Can't open directory <%s>", GetShuttleTempDir()));
2017                         return kFALSE;
2018                 }
2019
2020         } else {
2021                 gSystem->FreeDirectory(dir);
2022         }
2023
2024         TString baseFXSFolder;
2025         if (system == kDAQ)
2026         {
2027                 baseFXSFolder = "FES/";
2028         }
2029         else if (system == kDCS)
2030         {
2031                 baseFXSFolder = "";
2032         }
2033         else if (system == kHLT)
2034         {
2035                 baseFXSFolder = "/opt/FXS";
2036         }
2037
2038
2039         TString command = Form("scp -oPort=%d -2 %s@%s:%s%s %s/%s",
2040                 fConfig->GetFXSPort(system),
2041                 fConfig->GetFXSUser(system),
2042                 fConfig->GetFXSHost(system),
2043                 baseFXSFolder.Data(),
2044                 fxsFileName,
2045                 GetShuttleTempDir(),
2046                 localFileName);
2047
2048         AliDebug(2, Form("%s",command.Data()));
2049
2050         Bool_t result = (gSystem->Exec(command.Data()) == 0);
2051
2052         return result;
2053 }
2054
2055 //______________________________________________________________________________________________
2056 TList* AliShuttle::GetFileSources(Int_t system, const char* detector, const char* id)
2057 {
2058         //
2059         // Get sources producing the condition file Id from file exchange servers
2060         // if id is NULL all sources are returned (distinct)
2061         //
2062         
2063         // check if test mode should simulate a FXS error
2064         if (fTestMode & kErrorFXSSources)
2065         {
2066                 Log(detector, Form("GetFileSources - In TESTMODE - Simulating error while connecting to %s FXS", GetSystemName(system)));
2067                 return 0;
2068         }
2069
2070
2071         if (system == kDCS)
2072         {
2073                 AliError("DCS system has only one source of data!");
2074                 return NULL;
2075         }
2076
2077         // check connection, in case connect
2078         if (!Connect(system))
2079         {
2080                 Log(detector, Form("GetFileSources - Couldn't connect to %s FXS database", GetSystemName(system)));
2081                 return NULL;
2082         }
2083
2084         TString sourceName = 0;
2085         if (system == kDAQ)
2086         {
2087                 sourceName = "DAQsource";
2088         } else if (system == kHLT)
2089         {
2090                 sourceName = "DDLnumbers";
2091         }
2092
2093         TString sqlQueryStart = Form("select distinct %s from %s where", sourceName.Data(), fConfig->GetFXSdbTable(system));
2094         TString whereClause = Form("run=%d and detector=\"%s\"",
2095                                 GetCurrentRun(), detector);
2096         if (id)
2097                 whereClause += Form(" and fileId=\"%s\"", id);
2098         TString sqlQuery = Form("%s %s", sqlQueryStart.Data(), whereClause.Data());
2099
2100         AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
2101
2102         // Query execution
2103         TSQLResult* aResult;
2104         aResult = fServer[system]->Query(sqlQuery);
2105         if (!aResult) {
2106                 Log(detector, Form("GetFileSources - Can't execute SQL query to %s database for id: %s",
2107                                 GetSystemName(system), id));
2108                 return 0;
2109         }
2110
2111         TList *list = new TList();
2112         list->SetOwner(1);
2113         
2114         if (aResult->GetRowCount() == 0)
2115         {
2116                 Log(detector,
2117                         Form("GetFileSources - No entry in %s FXS table for id: %s", GetSystemName(system), id));
2118                 delete aResult;
2119                 return list;
2120         }
2121
2122         TSQLRow* aRow;
2123
2124         while ((aRow = aResult->Next()))
2125         {
2126
2127                 TString source(aRow->GetField(0), aRow->GetFieldLength(0));
2128                 AliDebug(2, Form("%s = %s", sourceName.Data(), source.Data()));
2129                 list->Add(new TObjString(source));
2130                 delete aRow;
2131         }
2132
2133         delete aResult;
2134
2135         return list;
2136 }
2137
2138 //______________________________________________________________________________________________
2139 TList* AliShuttle::GetFileIDs(Int_t system, const char* detector, const char* source)
2140 {
2141         //
2142         // Get all ids of condition files produced by a given source from file exchange servers
2143         //
2144         
2145         // check if test mode should simulate a FXS error
2146         if (fTestMode & kErrorFXSSources)
2147         {
2148                 Log(detector, Form("GetFileIDs - In TESTMODE - Simulating error while connecting to %s FXS", GetSystemName(system)));
2149                 return 0;
2150         }
2151
2152         // check connection, in case connect
2153         if (!Connect(system))
2154         {
2155                 Log(detector, Form("GetFileIDs - Couldn't connect to %s FXS database", GetSystemName(system)));
2156                 return NULL;
2157         }
2158
2159         TString sourceName = 0;
2160         if (system == kDAQ)
2161         {
2162                 sourceName = "DAQsource";
2163         } else if (system == kHLT)
2164         {
2165                 sourceName = "DDLnumbers";
2166         }
2167
2168         TString sqlQueryStart = Form("select fileId from %s where", fConfig->GetFXSdbTable(system));
2169         TString whereClause = Form("run=%d and detector=\"%s\"",
2170                                 GetCurrentRun(), detector);
2171         if (sourceName.Length() > 0 && source)
2172                 whereClause += Form(" and %s=\"%s\"", sourceName.Data(), source);
2173         TString sqlQuery = Form("%s %s", sqlQueryStart.Data(), whereClause.Data());
2174
2175         AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
2176
2177         // Query execution
2178         TSQLResult* aResult;
2179         aResult = fServer[system]->Query(sqlQuery);
2180         if (!aResult) {
2181                 Log(detector, Form("GetFileIDs - Can't execute SQL query to %s database for source: %s",
2182                                 GetSystemName(system), source));
2183                 return 0;
2184         }
2185
2186         TList *list = new TList();
2187         list->SetOwner(1);
2188         
2189         if (aResult->GetRowCount() == 0)
2190         {
2191                 Log(detector,
2192                         Form("GetFileIDs - No entry in %s FXS table for source: %s", GetSystemName(system), source));
2193                 delete aResult;
2194                 return list;
2195         }
2196
2197         TSQLRow* aRow;
2198
2199         while ((aRow = aResult->Next()))
2200         {
2201
2202                 TString id(aRow->GetField(0), aRow->GetFieldLength(0));
2203                 AliDebug(2, Form("fileId = %s", id.Data()));
2204                 list->Add(new TObjString(id));
2205                 delete aRow;
2206         }
2207
2208         delete aResult;
2209
2210         return list;
2211 }
2212
2213 //______________________________________________________________________________________________
2214 Bool_t AliShuttle::Connect(Int_t system)
2215 {
2216         // Connect to MySQL Server of the system's FXS MySQL databases
2217         // DAQ Logbook, Shuttle Logbook and DAQ FXS db are on the same host
2218         //
2219
2220         // check connection: if already connected return
2221         if(fServer[system] && fServer[system]->IsConnected()) return kTRUE;
2222
2223         TString dbHost, dbUser, dbPass, dbName;
2224
2225         if (system < 3) // FXS db servers
2226         {
2227                 dbHost = Form("mysql://%s:%d", fConfig->GetFXSdbHost(system), fConfig->GetFXSdbPort(system));
2228                 dbUser = fConfig->GetFXSdbUser(system);
2229                 dbPass = fConfig->GetFXSdbPass(system);
2230                 dbName =   fConfig->GetFXSdbName(system);
2231         } else { // Run & Shuttle logbook servers
2232         // TODO Will the Shuttle logbook server be the same as the Run logbook server ???
2233                 dbHost = Form("mysql://%s:%d", fConfig->GetDAQlbHost(), fConfig->GetDAQlbPort());
2234                 dbUser = fConfig->GetDAQlbUser();
2235                 dbPass = fConfig->GetDAQlbPass();
2236                 dbName =   fConfig->GetDAQlbDB();
2237         }
2238
2239         fServer[system] = TSQLServer::Connect(dbHost.Data(), dbUser.Data(), dbPass.Data());
2240         if (!fServer[system] || !fServer[system]->IsConnected()) {
2241                 if(system < 3)
2242                 {
2243                 AliError(Form("Can't establish connection to FXS database for %s",
2244                                         AliShuttleInterface::GetSystemName(system)));
2245                 } else {
2246                 AliError("Can't establish connection to Run logbook.");
2247                 }
2248                 if(fServer[system]) delete fServer[system];
2249                 return kFALSE;
2250         }
2251
2252         // Get tables
2253         TSQLResult* aResult=0;
2254         switch(system){
2255                 case kDAQ:
2256                         aResult = fServer[kDAQ]->GetTables(dbName.Data());
2257                         break;
2258                 case kDCS:
2259                         aResult = fServer[kDCS]->GetTables(dbName.Data());
2260                         break;
2261                 case kHLT:
2262                         aResult = fServer[kHLT]->GetTables(dbName.Data());
2263                         break;
2264                 default:
2265                         aResult = fServer[3]->GetTables(dbName.Data());
2266                         break;
2267         }
2268
2269         delete aResult;
2270         return kTRUE;
2271 }
2272
2273 //______________________________________________________________________________________________
2274 Bool_t AliShuttle::UpdateTable()
2275 {
2276         //
2277         // Update FXS table filling time_processed field in all rows corresponding to current run and detector
2278         //
2279
2280         Bool_t result = kTRUE;
2281
2282         for (UInt_t system=0; system<3; system++)
2283         {
2284                 if(!fFXSCalled[system]) continue;
2285
2286                 // check connection, in case connect
2287                 if (!Connect(system))
2288                 {
2289                         Log(fCurrentDetector, Form("UpdateTable - Couldn't connect to %s FXS database", GetSystemName(system)));
2290                         result = kFALSE;
2291                         continue;
2292                 }
2293
2294                 TTimeStamp now; // now
2295
2296                 // Loop on FXS list entries
2297                 TIter iter(&fFXSlist[system]);
2298                 TObjString *aFXSentry=0;
2299                 while ((aFXSentry = dynamic_cast<TObjString*> (iter.Next())))
2300                 {
2301                         TString aFXSentrystr = aFXSentry->String();
2302                         TObjArray *aFXSarray = aFXSentrystr.Tokenize("#!?!#");
2303                         if (!aFXSarray || aFXSarray->GetEntries() != 2 )
2304                         {
2305                                 Log(fCurrentDetector, Form("UpdateTable - error updating %s FXS entry. Check string: <%s>",
2306                                         GetSystemName(system), aFXSentrystr.Data()));
2307                                 if(aFXSarray) delete aFXSarray;
2308                                 result = kFALSE;
2309                                 continue;
2310                         }
2311                         const char* fileId = ((TObjString*) aFXSarray->At(0))->GetName();
2312                         const char* source = ((TObjString*) aFXSarray->At(1))->GetName();
2313
2314                         TString whereClause;
2315                         if (system == kDAQ)
2316                         {
2317                                 whereClause = Form("where run=%d and detector=\"%s\" and fileId=\"%s\" and DAQsource=\"%s\";",
2318                                                         GetCurrentRun(), fCurrentDetector.Data(), fileId, source);
2319                         }
2320                         else if (system == kDCS)
2321                         {
2322                                 whereClause = Form("where run=%d and detector=\"%s\" and fileId=\"%s\";",
2323                                                         GetCurrentRun(), fCurrentDetector.Data(), fileId);
2324                         }
2325                         else if (system == kHLT)
2326                         {
2327                                 whereClause = Form("where run=%d and detector=\"%s\" and fileId=\"%s\" and DDLnumbers=\"%s\";",
2328                                                         GetCurrentRun(), fCurrentDetector.Data(), fileId, source);
2329                         }
2330
2331                         delete aFXSarray;
2332
2333                         TString sqlQuery = Form("update %s set time_processed=%d %s", fConfig->GetFXSdbTable(system),
2334                                                                 now.GetSec(), whereClause.Data());
2335
2336                         AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
2337
2338                         // Query execution
2339                         TSQLResult* aResult;
2340                         aResult = dynamic_cast<TSQLResult*> (fServer[system]->Query(sqlQuery));
2341                         if (!aResult)
2342                         {
2343                                 Log(fCurrentDetector, Form("UpdateTable - %s db: can't execute SQL query <%s>",
2344                                                                 GetSystemName(system), sqlQuery.Data()));
2345                                 result = kFALSE;
2346                                 continue;
2347                         }
2348                         delete aResult;
2349                 }
2350         }
2351
2352         return result;
2353 }
2354
2355 //______________________________________________________________________________________________
2356 Bool_t AliShuttle::UpdateTableFailCase()
2357 {
2358         // Update FXS table filling time_processed field in all rows corresponding to current run and detector
2359         // this is called in case the preprocessor is declared failed for the current run, because
2360         // the fields are updated only in case of success
2361
2362         Bool_t result = kTRUE;
2363
2364         for (UInt_t system=0; system<3; system++)
2365         {
2366                 // check connection, in case connect
2367                 if (!Connect(system))
2368                 {
2369                         Log(fCurrentDetector, Form("UpdateTableFailCase - Couldn't connect to %s FXS database",
2370                                                         GetSystemName(system)));
2371                         result = kFALSE;
2372                         continue;
2373                 }
2374
2375                 TTimeStamp now; // now
2376
2377                 // Loop on FXS list entries
2378
2379                 TString whereClause = Form("where run=%d and detector=\"%s\";",
2380                                                 GetCurrentRun(), fCurrentDetector.Data());
2381
2382
2383                 TString sqlQuery = Form("update %s set time_processed=%d %s", fConfig->GetFXSdbTable(system),
2384                                                         now.GetSec(), whereClause.Data());
2385
2386                 AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
2387
2388                 // Query execution
2389                 TSQLResult* aResult;
2390                 aResult = dynamic_cast<TSQLResult*> (fServer[system]->Query(sqlQuery));
2391                 if (!aResult)
2392                 {
2393                         Log(fCurrentDetector, Form("UpdateTableFailCase - %s db: can't execute SQL query <%s>",
2394                                                         GetSystemName(system), sqlQuery.Data()));
2395                         result = kFALSE;
2396                         continue;
2397                 }
2398                 delete aResult;
2399         }
2400
2401         return result;
2402 }
2403
2404 //______________________________________________________________________________________________
2405 Bool_t AliShuttle::UpdateShuttleLogbook(const char* detector, const char* status)
2406 {
2407         //
2408         // Update Shuttle logbook filling detector or shuttle_done column
2409         // ex. of usage: UpdateShuttleLogbook("PHOS", "DONE") or UpdateShuttleLogbook("shuttle_done")
2410         //
2411
2412         // check connection, in case connect
2413         if(!Connect(3)){
2414                 Log("SHUTTLE", "UpdateShuttleLogbook - Couldn't connect to DAQ Logbook.");
2415                 return kFALSE;
2416         }
2417
2418         TString detName(detector);
2419         TString setClause;
2420         if(detName == "shuttle_done")
2421         {
2422                 setClause = "set shuttle_done=1";
2423
2424                 // Send the information to ML
2425                 TMonaLisaText  mlStatus("SHUTTLE_status", "Done");
2426
2427                 TList mlList;
2428                 mlList.Add(&mlStatus);
2429
2430                 fMonaLisa->SendParameters(&mlList);
2431         } else {
2432                 TString statusStr(status);
2433                 if(statusStr.Contains("done", TString::kIgnoreCase) ||
2434                    statusStr.Contains("failed", TString::kIgnoreCase)){
2435                         setClause = Form("set %s=\"%s\"", detector, status);
2436                 } else {
2437                         Log("SHUTTLE",
2438                                 Form("UpdateShuttleLogbook - Invalid status <%s> for detector %s",
2439                                         status, detector));
2440                         return kFALSE;
2441                 }
2442         }
2443
2444         TString whereClause = Form("where run=%d", GetCurrentRun());
2445
2446         TString sqlQuery = Form("update %s %s %s",
2447                                         fConfig->GetShuttlelbTable(), setClause.Data(), whereClause.Data());
2448
2449         AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
2450
2451         // Query execution
2452         TSQLResult* aResult;
2453         aResult = dynamic_cast<TSQLResult*> (fServer[3]->Query(sqlQuery));
2454         if (!aResult) {
2455                 Log("SHUTTLE", Form("UpdateShuttleLogbook - Can't execute query <%s>", sqlQuery.Data()));
2456                 return kFALSE;
2457         }
2458         delete aResult;
2459
2460         return kTRUE;
2461 }
2462
2463 //______________________________________________________________________________________________
2464 Int_t AliShuttle::GetCurrentRun() const
2465 {
2466         //
2467         // Get current run from logbook entry
2468         //
2469
2470         return fLogbookEntry ? fLogbookEntry->GetRun() : -1;
2471 }
2472
2473 //______________________________________________________________________________________________
2474 UInt_t AliShuttle::GetCurrentStartTime() const
2475 {
2476         //
2477         // get current start time
2478         //
2479
2480         return fLogbookEntry ? fLogbookEntry->GetStartTime() : 0;
2481 }
2482
2483 //______________________________________________________________________________________________
2484 UInt_t AliShuttle::GetCurrentEndTime() const
2485 {
2486         //
2487         // get current end time from logbook entry
2488         //
2489
2490         return fLogbookEntry ? fLogbookEntry->GetEndTime() : 0;
2491 }
2492
2493 //______________________________________________________________________________________________
2494 void AliShuttle::Log(const char* detector, const char* message)
2495 {
2496         //
2497         // Fill log string with a message
2498         //
2499
2500         void* dir = gSystem->OpenDirectory(GetShuttleLogDir());
2501         if (dir == NULL) {
2502                 if (gSystem->mkdir(GetShuttleLogDir(), kTRUE)) {
2503                         AliError(Form("Can't open directory <%s>", GetShuttleLogDir()));
2504                         return;
2505                 }
2506
2507         } else {
2508                 gSystem->FreeDirectory(dir);
2509         }
2510
2511         TString toLog = Form("%s (%d): %s - ", TTimeStamp(time(0)).AsString("s"), getpid(), detector);
2512         if (GetCurrentRun() >= 0) 
2513                 toLog += Form("run %d - ", GetCurrentRun());
2514         toLog += Form("%s", message);
2515
2516         AliInfo(toLog.Data());
2517         
2518         // if we redirect the log output already to the file, leave here
2519         if (fOutputRedirected && strcmp(detector, "SHUTTLE") != 0)
2520                 return;
2521
2522         TString fileName = GetLogFileName(detector);
2523         
2524         gSystem->ExpandPathName(fileName);
2525
2526         ofstream logFile;
2527         logFile.open(fileName, ofstream::out | ofstream::app);
2528
2529         if (!logFile.is_open()) {
2530                 AliError(Form("Could not open file %s", fileName.Data()));
2531                 return;
2532         }
2533
2534         logFile << toLog.Data() << "\n";
2535
2536         logFile.close();
2537 }
2538
2539 //______________________________________________________________________________________________
2540 TString AliShuttle::GetLogFileName(const char* detector) const
2541 {
2542         // 
2543         // returns the name of the log file for a given sub detector
2544         //
2545         
2546         TString fileName;
2547         
2548         if (GetCurrentRun() >= 0) 
2549                 fileName.Form("%s/%s_%d.log", GetShuttleLogDir(), detector, GetCurrentRun());
2550         else
2551                 fileName.Form("%s/%s.log", GetShuttleLogDir(), detector);
2552
2553         return fileName;
2554 }
2555
2556 //______________________________________________________________________________________________
2557 Bool_t AliShuttle::Collect(Int_t run)
2558 {
2559         //
2560         // Collects conditions data for all UNPROCESSED run written to DAQ LogBook in case of run = -1 (default)
2561         // If a dedicated run is given this run is processed
2562         //
2563         // In operational mode, this is the Shuttle function triggered by the EOR signal.
2564         //
2565
2566         if (run == -1)
2567                 Log("SHUTTLE","Collect - Shuttle called. Collecting conditions data for unprocessed runs");
2568         else
2569                 Log("SHUTTLE", Form("Collect - Shuttle called. Collecting conditions data for run %d", run));
2570
2571         SetLastAction("Starting");
2572
2573         TString whereClause("where shuttle_done=0");
2574         if (run != -1)
2575                 whereClause += Form(" and run=%d", run);
2576
2577         TObjArray shuttleLogbookEntries;
2578         if (!QueryShuttleLogbook(whereClause, shuttleLogbookEntries))
2579         {
2580                 Log("SHUTTLE", "Collect - Can't retrieve entries from Shuttle logbook");
2581                 return kFALSE;
2582         }
2583
2584         if (shuttleLogbookEntries.GetEntries() == 0)
2585         {
2586                 if (run == -1)
2587                         Log("SHUTTLE","Collect - Found no UNPROCESSED runs in Shuttle logbook");
2588                 else
2589                         Log("SHUTTLE", Form("Collect - Run %d is already DONE "
2590                                                 "or it does not exist in Shuttle logbook", run));
2591                 return kTRUE;
2592         }
2593
2594         for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
2595                 fFirstUnprocessed[iDet] = kTRUE;
2596
2597         if (run != -1)
2598         {
2599                 // query Shuttle logbook for earlier runs, check if some detectors are unprocessed,
2600                 // flag them into fFirstUnprocessed array
2601                 TString whereClause(Form("where shuttle_done=0 and run < %d", run));
2602                 TObjArray tmpLogbookEntries;
2603                 if (!QueryShuttleLogbook(whereClause, tmpLogbookEntries))
2604                 {
2605                         Log("SHUTTLE", "Collect - Can't retrieve entries from Shuttle logbook");
2606                         return kFALSE;
2607                 }
2608
2609                 TIter iter(&tmpLogbookEntries);
2610                 AliShuttleLogbookEntry* anEntry = 0;
2611                 while ((anEntry = dynamic_cast<AliShuttleLogbookEntry*> (iter.Next())))
2612                 {
2613                         for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
2614                         {
2615                                 if (anEntry->GetDetectorStatus(iDet) == AliShuttleLogbookEntry::kUnprocessed)
2616                                 {
2617                                         AliDebug(2, Form("Run %d: setting %s as \"not first time unprocessed\"",
2618                                                         anEntry->GetRun(), GetDetName(iDet)));
2619                                         fFirstUnprocessed[iDet] = kFALSE;
2620                                 }
2621                         }
2622
2623                 }
2624
2625         }
2626
2627         if (!RetrieveConditionsData(shuttleLogbookEntries))
2628         {
2629                 Log("SHUTTLE", "Collect - Process of at least one run failed");
2630                 return kFALSE;
2631         }
2632
2633         Log("SHUTTLE", "Collect - Requested run(s) successfully processed");
2634         return kTRUE;
2635 }
2636
2637 //______________________________________________________________________________________________
2638 Bool_t AliShuttle::RetrieveConditionsData(const TObjArray& dateEntries)
2639 {
2640         //
2641         // Retrieve conditions data for all runs that aren't processed yet
2642         //
2643
2644         Bool_t hasError = kFALSE;
2645
2646         TIter iter(&dateEntries);
2647         AliShuttleLogbookEntry* anEntry;
2648
2649         while ((anEntry = (AliShuttleLogbookEntry*) iter.Next())){
2650                 if (!Process(anEntry)){
2651                         hasError = kTRUE;
2652                 }
2653
2654                 // clean SHUTTLE temp directory
2655                 TString filename = Form("%s/*.shuttle", GetShuttleTempDir());
2656                 RemoveFile(filename.Data());
2657         }
2658
2659         return hasError == kFALSE;
2660 }
2661
2662 //______________________________________________________________________________________________
2663 ULong_t AliShuttle::GetTimeOfLastAction() const
2664 {
2665         //
2666         // Gets time of last action
2667         //
2668
2669         ULong_t tmp;
2670
2671         fMonitoringMutex->Lock();
2672
2673         tmp = fLastActionTime;
2674
2675         fMonitoringMutex->UnLock();
2676
2677         return tmp;
2678 }
2679
2680 //______________________________________________________________________________________________
2681 const TString AliShuttle::GetLastAction() const
2682 {
2683         //
2684         // returns a string description of the last action
2685         //
2686
2687         TString tmp;
2688
2689         fMonitoringMutex->Lock();
2690         
2691         tmp = fLastAction;
2692         
2693         fMonitoringMutex->UnLock();
2694
2695         return tmp;
2696 }
2697
2698 //______________________________________________________________________________________________
2699 void AliShuttle::SetLastAction(const char* action)
2700 {
2701         //
2702         // updates the monitoring variables
2703         //
2704
2705         fMonitoringMutex->Lock();
2706
2707         fLastAction = action;
2708         fLastActionTime = time(0);
2709         
2710         fMonitoringMutex->UnLock();
2711 }
2712
2713 //______________________________________________________________________________________________
2714 const char* AliShuttle::GetRunParameter(const char* param)
2715 {
2716         //
2717         // returns run parameter read from DAQ logbook
2718         //
2719
2720         if(!fLogbookEntry) {
2721                 AliError("No logbook entry!");
2722                 return 0;
2723         }
2724
2725         return fLogbookEntry->GetRunParameter(param);
2726 }
2727
2728 //______________________________________________________________________________________________
2729 AliCDBEntry* AliShuttle::GetFromOCDB(const char* detector, const AliCDBPath& path)
2730 {
2731         //
2732         // returns object from OCDB valid for current run
2733         //
2734
2735         if (fTestMode & kErrorOCDB)
2736         {
2737                 Log(detector, "GetFromOCDB - In TESTMODE - Simulating error with OCDB");
2738                 return 0;
2739         }
2740         
2741         AliCDBStorage *sto = AliCDBManager::Instance()->GetStorage(fgkMainCDB);
2742         if (!sto)
2743         {
2744                 Log(detector, "GetFromOCDB - Cannot activate main OCDB for query!");
2745                 return 0;
2746         }
2747
2748         return dynamic_cast<AliCDBEntry*> (sto->Get(path, GetCurrentRun()));
2749 }
2750
2751 //______________________________________________________________________________________________
2752 Bool_t AliShuttle::SendMail()
2753 {
2754         //
2755         // sends a mail to the subdetector expert in case of preprocessor error
2756         //
2757         
2758         if (fTestMode != kNone)
2759                 return kTRUE;
2760
2761         void* dir = gSystem->OpenDirectory(GetShuttleLogDir());
2762         if (dir == NULL)
2763         {
2764                 if (gSystem->mkdir(GetShuttleLogDir(), kTRUE))
2765                 {
2766                         AliError(Form("Can't open directory <%s>", GetShuttleLogDir()));
2767                         return kFALSE;
2768                 }
2769
2770         } else {
2771                 gSystem->FreeDirectory(dir);
2772         }
2773
2774         TString bodyFileName;
2775         bodyFileName.Form("%s/mail.body", GetShuttleLogDir());
2776         gSystem->ExpandPathName(bodyFileName);
2777
2778         ofstream mailBody;
2779         mailBody.open(bodyFileName, ofstream::out);
2780
2781         if (!mailBody.is_open())
2782         {
2783                 AliError(Form("Could not open mail body file %s", bodyFileName.Data()));
2784                 return kFALSE;
2785         }
2786
2787         TString to="";
2788         TIter iterExperts(fConfig->GetResponsibles(fCurrentDetector));
2789         TObjString *anExpert=0;
2790         while ((anExpert = (TObjString*) iterExperts.Next()))
2791         {
2792                 to += Form("%s,", anExpert->GetName());
2793         }
2794         to.Remove(to.Length()-1);
2795         AliDebug(2, Form("to: %s",to.Data()));
2796
2797         if (to.IsNull()) {
2798                 AliInfo("List of detector responsibles not yet set!");
2799                 return kFALSE;
2800         }
2801
2802         TString cc="alberto.colla@cern.ch";
2803
2804         TString subject = Form("%s Shuttle preprocessor FAILED in run %d !",
2805                                 fCurrentDetector.Data(), GetCurrentRun());
2806         AliDebug(2, Form("subject: %s", subject.Data()));
2807
2808         TString body = Form("Dear %s expert(s), \n\n", fCurrentDetector.Data());
2809         body += Form("SHUTTLE just detected that your preprocessor "
2810                         "failed processing run %d!!\n\n", GetCurrentRun());
2811         body += Form("Please check %s status on the SHUTTLE monitoring page: \n\n", fCurrentDetector.Data());
2812         body += Form("\thttp://pcalimonitor.cern.ch:8889/shuttle.jsp?time=168 \n\n");
2813         body += Form("Find the %s log for the current run on \n\n"
2814                 "\thttp://pcalishuttle01.cern.ch:8880/logs/%s_%d.log \n\n", 
2815                 fCurrentDetector.Data(), fCurrentDetector.Data(), GetCurrentRun());
2816         body += Form("The last 10 lines of %s log file are following:\n\n");
2817
2818         AliDebug(2, Form("Body begin: %s", body.Data()));
2819
2820         mailBody << body.Data();
2821         mailBody.close();
2822         mailBody.open(bodyFileName, ofstream::out | ofstream::app);
2823
2824         TString logFileName = Form("%s/%s_%d.log", GetShuttleLogDir(), fCurrentDetector.Data(), GetCurrentRun());
2825         TString tailCommand = Form("tail -n 10 %s >> %s", logFileName.Data(), bodyFileName.Data());
2826         if (gSystem->Exec(tailCommand.Data()))
2827         {
2828                 mailBody << Form("%s log file not found ...\n\n", fCurrentDetector.Data());
2829         }
2830
2831         TString endBody = Form("------------------------------------------------------\n\n");
2832         endBody += Form("In case of problems please contact the SHUTTLE core team.\n\n");
2833         endBody += "Please do not answer this message directly, it is automatically generated.\n\n";
2834         endBody += "Greetings,\n\n \t\t\tthe SHUTTLE\n";
2835
2836         AliDebug(2, Form("Body end: %s", endBody.Data()));
2837
2838         mailBody << endBody.Data();
2839
2840         mailBody.close();
2841
2842         // send mail!
2843         TString mailCommand = Form("mail -s \"%s\" -c %s %s < %s",
2844                                                 subject.Data(),
2845                                                 cc.Data(),
2846                                                 to.Data(),
2847                                                 bodyFileName.Data());
2848         AliDebug(2, Form("mail command: %s", mailCommand.Data()));
2849
2850         Bool_t result = gSystem->Exec(mailCommand.Data());
2851
2852         return result == 0;
2853 }
2854
2855 //______________________________________________________________________________________________
2856 const char* AliShuttle::GetRunType()
2857 {
2858         //
2859         // returns run type read from "run type" logbook
2860         //
2861
2862         if(!fLogbookEntry) {
2863                 AliError("No logbook entry!");
2864                 return 0;
2865         }
2866
2867         return fLogbookEntry->GetRunType();
2868 }
2869
2870 //______________________________________________________________________________________________
2871 void AliShuttle::SetShuttleTempDir(const char* tmpDir)
2872 {
2873         //
2874         // sets Shuttle temp directory
2875         //
2876
2877         fgkShuttleTempDir = gSystem->ExpandPathName(tmpDir);
2878 }
2879
2880 //______________________________________________________________________________________________
2881 void AliShuttle::SetShuttleLogDir(const char* logDir)
2882 {
2883         //
2884         // sets Shuttle log directory
2885         //
2886
2887         fgkShuttleLogDir = gSystem->ExpandPathName(logDir);
2888 }