]> git.uio.no Git - u/mrichter/AliRoot.git/blob - SHUTTLE/AliShuttle.cxx
1. Re-organization of function calls in TestPreprocessor to make it more meaningful.
[u/mrichter/AliRoot.git] / SHUTTLE / AliShuttle.cxx
1 /**************************************************************************
2  * Copyright(c) 1998-1999, ALICE Experiment at CERN, All rights reserved. *
3  *                                                                        *
4  * Author: The ALICE Off-line Project.                                    *
5  * Contributors are mentioned in the code where appropriate.              *
6  *                                                                        *
7  * Permission to use, copy, modify and distribute this software and its   *
8  * documentation strictly for non-commercial purposes is hereby granted   *
9  * without fee, provided that the above copyright notice appears in all   *
10  * copies and that both the copyright notice and this permission notice   *
11  * appear in the supporting documentation. The authors make no claims     *
12  * about the suitability of this software for any purpose. It is          *
13  * provided "as is" without express or implied warranty.                  *
14  **************************************************************************/
15
16 /*
17 $Log$
18 Revision 1.34  2007/04/04 10:33:36  jgrosseo
19 1) Storing of files to the Grid is now done _after_ your preprocessors succeeded. This is transparent, which means that you can still use the same functions (Store, StoreReferenceData) to store files to the Grid. However, the Shuttle first stores them locally and transfers them after the preprocessor finished. The return code of these two functions has changed from UInt_t to Bool_t which gives you the success of the storing.
20 In case of an error with the Grid, the Shuttle will retry the storing later, the preprocessor does not need to be run again.
21
22 2) The meaning of the return code of the preprocessor has changed. 0 is now success and any other value means failure. This value is stored in the log and you can use it to keep details about the error condition.
23
24 3) New function StoreReferenceFile to _directly_ store a file (without opening it) to the reference storage.
25
26 4) The memory usage of the preprocessor is monitored. If it exceeds 2 GB it is terminated.
27
28 5) New function AliPreprocessor::ProcessDCS(). If you do not need to have DCS data in all cases, you can skip the processing by implemting this function and returning kFALSE under certain conditions. E.g. if there is a certain run type.
29 If you always need DCS data (like before), you do not need to implement it.
30
31 6) The run type has been added to the monitoring page
32
33 Revision 1.33  2007/04/03 13:56:01  acolla
34 Grid Storage at the end of preprocessing. Added virtual method to disable DCS query according to the
35 run type.
36
37 Revision 1.32  2007/02/28 10:41:56  acolla
38 Run type field added in SHUTTLE framework. Run type is read from "run type" logbook and retrieved by
39 AliPreprocessor::GetRunType() function.
40 Added some ldap definition files.
41
42 Revision 1.30  2007/02/13 11:23:21  acolla
43 Moved getters and setters of Shuttle's main OCDB/Reference, local
44 OCDB/Reference, temp and log folders to AliShuttleInterface
45
46 Revision 1.27  2007/01/30 17:52:42  jgrosseo
47 adding monalisa monitoring
48
49 Revision 1.26  2007/01/23 19:20:03  acolla
50 Removed old ldif files, added TOF, MCH ldif files. Added some options in
51 AliShuttleConfig::Print. Added in Ali Shuttle: SetShuttleTempDir and
52 SetShuttleLogDir
53
54 Revision 1.25  2007/01/15 19:13:52  acolla
55 Moved some AliInfo to AliDebug in SendMail function
56
57 Revision 1.21  2006/12/07 08:51:26  jgrosseo
58 update (alberto):
59 table, db names in ldap configuration
60 added GRP preprocessor
61 DCS data can also be retrieved by data point
62
63 Revision 1.20  2006/11/16 16:16:48  jgrosseo
64 introducing strict run ordering flag
65 removed giving preprocessor name to preprocessor, they have to know their name themselves ;-)
66
67 Revision 1.19  2006/11/06 14:23:04  jgrosseo
68 major update (Alberto)
69 o) reading of run parameters from the logbook
70 o) online offline naming conversion
71 o) standalone DCSclient package
72
73 Revision 1.18  2006/10/20 15:22:59  jgrosseo
74 o) Adding time out to the execution of the preprocessors: The Shuttle forks and the parent process monitors the child
75 o) Merging Collect, CollectAll, CollectNew function
76 o) Removing implementation of empty copy constructors (declaration still there!)
77
78 Revision 1.17  2006/10/05 16:20:55  jgrosseo
79 adapting to new CDB classes
80
81 Revision 1.16  2006/10/05 15:46:26  jgrosseo
82 applying to the new interface
83
84 Revision 1.15  2006/10/02 16:38:39  jgrosseo
85 update (alberto):
86 fixed memory leaks
87 storing of objects that failed to be stored to the grid before
88 interfacing of shuttle status table in daq system
89
90 Revision 1.14  2006/08/29 09:16:05  jgrosseo
91 small update
92
93 Revision 1.13  2006/08/15 10:50:00  jgrosseo
94 effc++ corrections (alberto)
95
96 Revision 1.12  2006/08/08 14:19:29  jgrosseo
97 Update to shuttle classes (Alberto)
98
99 - Possibility to set the full object's path in the Preprocessor's and
100 Shuttle's  Store functions
101 - Possibility to extend the object's run validity in the same classes
102 ("startValidity" and "validityInfinite" parameters)
103 - Implementation of the StoreReferenceData function to store reference
104 data in a dedicated CDB storage.
105
106 Revision 1.11  2006/07/21 07:37:20  jgrosseo
107 last run is stored after each run
108
109 Revision 1.10  2006/07/20 09:54:40  jgrosseo
110 introducing status management: The processing per subdetector is divided into several steps,
111 after each step the status is stored on disk. If the system crashes in any of the steps the Shuttle
112 can keep track of the number of failures and skips further processing after a certain threshold is
113 exceeded. These thresholds can be configured in LDAP.
114
115 Revision 1.9  2006/07/19 10:09:55  jgrosseo
116 new configuration, accesst to DAQ FES (Alberto)
117
118 Revision 1.8  2006/07/11 12:44:36  jgrosseo
119 adding parameters for extended validity range of data produced by preprocessor
120
121 Revision 1.7  2006/07/10 14:37:09  jgrosseo
122 small fix + todo comment
123
124 Revision 1.6  2006/07/10 13:01:41  jgrosseo
125 enhanced storing of last sucessfully processed run (alberto)
126
127 Revision 1.5  2006/07/04 14:59:57  jgrosseo
128 revision of AliDCSValue: Removed wrapper classes, reduced storage size per value by factor 2
129
130 Revision 1.4  2006/06/12 09:11:16  jgrosseo
131 coding conventions (Alberto)
132
133 Revision 1.3  2006/06/06 14:26:40  jgrosseo
134 o) removed files that were moved to STEER
135 o) shuttle updated to follow the new interface (Alberto)
136
137 Revision 1.2  2006/03/07 07:52:34  hristov
138 New version (B.Yordanov)
139
140 Revision 1.6  2005/11/19 17:19:14  byordano
141 RetrieveDATEEntries and RetrieveConditionsData added
142
143 Revision 1.5  2005/11/19 11:09:27  byordano
144 AliShuttle declaration added
145
146 Revision 1.4  2005/11/17 17:47:34  byordano
147 TList changed to TObjArray
148
149 Revision 1.3  2005/11/17 14:43:23  byordano
150 import to local CVS
151
152 Revision 1.1.1.1  2005/10/28 07:33:58  hristov
153 Initial import as subdirectory in AliRoot
154
155 Revision 1.2  2005/09/13 08:41:15  byordano
156 default startTime endTime added
157
158 Revision 1.4  2005/08/30 09:13:02  byordano
159 some docs added
160
161 Revision 1.3  2005/08/29 21:15:47  byordano
162 some docs added
163
164 */
165
166 //
167 // This class is the main manager for AliShuttle. 
168 // It organizes the data retrieval from DCS and call the 
169 // interface methods of AliPreprocessor.
170 // For every detector in AliShuttleConfgi (see AliShuttleConfig),
171 // data for its set of aliases is retrieved. If there is registered
172 // AliPreprocessor for this detector then it will be used
173 // accroding to the schema (see AliPreprocessor).
174 // If there isn't registered AliPreprocessor than the retrieved
175 // data is stored automatically to the undelying AliCDBStorage.
176 // For detSpec is used the alias name.
177 //
178
179 #include "AliShuttle.h"
180
181 #include "AliCDBManager.h"
182 #include "AliCDBStorage.h"
183 #include "AliCDBId.h"
184 #include "AliCDBRunRange.h"
185 #include "AliCDBPath.h"
186 #include "AliCDBEntry.h"
187 #include "AliShuttleConfig.h"
188 #include "DCSClient/AliDCSClient.h"
189 #include "AliLog.h"
190 #include "AliPreprocessor.h"
191 #include "AliShuttleStatus.h"
192 #include "AliShuttleLogbookEntry.h"
193
194 #include <TSystem.h>
195 #include <TObject.h>
196 #include <TString.h>
197 #include <TTimeStamp.h>
198 #include <TObjString.h>
199 #include <TSQLServer.h>
200 #include <TSQLResult.h>
201 #include <TSQLRow.h>
202 #include <TMutex.h>
203 #include <TSystemDirectory.h>
204 #include <TSystemFile.h>
205 #include <TFileMerger.h>
206 #include <TGrid.h>
207 #include <TGridResult.h>
208
209 #include <TMonaLisaWriter.h>
210
211 #include <fstream>
212
213 #include <sys/types.h>
214 #include <sys/wait.h>
215
216 ClassImp(AliShuttle)
217
218 //______________________________________________________________________________________________
219 AliShuttle::AliShuttle(const AliShuttleConfig* config,
220                 UInt_t timeout, Int_t retries):
221 fConfig(config),
222 fTimeout(timeout), fRetries(retries),
223 fPreprocessorMap(),
224 fLogbookEntry(0),
225 fCurrentDetector(),
226 fStatusEntry(0),
227 fMonitoringMutex(0),
228 fLastActionTime(0),
229 fLastAction(),
230 fMonaLisa(0),
231 fTestMode(kNone),
232 fReadTestMode(kFALSE)
233 {
234         //
235         // config: AliShuttleConfig used
236         // timeout: timeout used for AliDCSClient connection
237         // retries: the number of retries in case of connection error.
238         //
239
240         if (!fConfig->IsValid()) AliFatal("********** !!!!! Invalid configuration !!!!! **********");
241         for(int iSys=0;iSys<4;iSys++) {
242                 fServer[iSys]=0;
243                 if (iSys < 3)
244                         fFXSlist[iSys].SetOwner(kTRUE);
245         }
246         fPreprocessorMap.SetOwner(kTRUE);
247
248         for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
249                 fFirstUnprocessed[iDet] = kFALSE;
250
251         fMonitoringMutex = new TMutex();
252 }
253
254 //______________________________________________________________________________________________
255 AliShuttle::~AliShuttle()
256 {
257         //
258         // destructor
259         //
260
261         fPreprocessorMap.DeleteAll();
262         for(int iSys=0;iSys<4;iSys++)
263                 if(fServer[iSys]) {
264                         fServer[iSys]->Close();
265                         delete fServer[iSys];
266                         fServer[iSys] = 0;
267                 }
268
269         if (fStatusEntry){
270                 delete fStatusEntry;
271                 fStatusEntry = 0;
272         }
273         
274         if (fMonitoringMutex) 
275         {
276                 delete fMonitoringMutex;
277                 fMonitoringMutex = 0;
278         }
279 }
280
281 //______________________________________________________________________________________________
282 void AliShuttle::RegisterPreprocessor(AliPreprocessor* preprocessor)
283 {
284         //
285         // Registers new AliPreprocessor.
286         // It uses GetName() for indentificator of the pre processor.
287         // The pre processor is registered it there isn't any other
288         // with the same identificator (GetName()).
289         //
290
291         const char* detName = preprocessor->GetName();
292         if(GetDetPos(detName) < 0)
293                 AliFatal(Form("********** !!!!! Invalid detector name: %s !!!!! **********", detName));
294
295         if (fPreprocessorMap.GetValue(detName)) {
296                 AliWarning(Form("AliPreprocessor %s is already registered!", detName));
297                 return;
298         }
299
300         fPreprocessorMap.Add(new TObjString(detName), preprocessor);
301 }
302 //______________________________________________________________________________________________
303 Bool_t AliShuttle::Store(const AliCDBPath& path, TObject* object,
304                 AliCDBMetaData* metaData, Int_t validityStart, Bool_t validityInfinite)
305 {
306         // Stores a CDB object in the storage for offline reconstruction. Objects that are not needed for
307         // offline reconstruction, but should be stored anyway (e.g. for debugging) should NOT be stored
308         // using this function. Use StoreReferenceData instead!
309         // It calls StoreLocally function which temporarily stores the data locally; when the preprocessor
310         // finishes the data are transferred to the main storage (Grid).
311
312         return StoreLocally(fgkLocalCDB, path, object, metaData, validityStart, validityInfinite);
313 }
314
315 //______________________________________________________________________________________________
316 Bool_t AliShuttle::StoreReferenceData(const AliCDBPath& path, TObject* object, AliCDBMetaData* metaData)
317 {
318         // Stores a CDB object in the storage for reference data. This objects will not be available during
319         // offline reconstrunction. Use this function for reference data only!
320         // It calls StoreLocally function which temporarily stores the data locally; when the preprocessor
321         // finishes the data are transferred to the main storage (Grid).
322
323         return StoreLocally(fgkLocalRefStorage, path, object, metaData);
324 }
325
326 //______________________________________________________________________________________________
327 Bool_t AliShuttle::StoreLocally(const TString& localUri,
328                         const AliCDBPath& path, TObject* object, AliCDBMetaData* metaData,
329                         Int_t validityStart, Bool_t validityInfinite)
330 {
331         // Store object temporarily in local storage. Parameters are passed by Store and StoreReferenceData functions.
332         // when the preprocessor finishes the data are transferred to the main storage (Grid).
333         // The parameters are:
334         //   1) Uri of the backup storage (Local)
335         //   2) the object's path.
336         //   3) the object to be stored
337         //   4) the metaData to be associated with the object
338         //   5) the validity start run number w.r.t. the current run,
339         //      if the data is valid only for this run leave the default 0
340         //   6) specifies if the calibration data is valid for infinity (this means until updated),
341         //      typical for calibration runs, the default is kFALSE
342         //
343         // returns 0 if fail, 1 otherwise
344
345         if (fTestMode & kErrorStorage)
346         {
347                 Log(fCurrentDetector, "StoreLocally - In TESTMODE - Simulating error while storing locally");
348                 return kFALSE;
349         }
350         
351         const char* cdbType = (localUri == fgkLocalCDB) ? "CDB" : "Reference";
352
353         Int_t firstRun = GetCurrentRun() - validityStart;
354         if(firstRun < 0) {
355                 AliWarning("First valid run happens to be less than 0! Setting it to 0.");
356                 firstRun=0;
357         }
358
359         Int_t lastRun = -1;
360         if(validityInfinite) {
361                 lastRun = AliCDBRunRange::Infinity();
362         } else {
363                 lastRun = GetCurrentRun();
364         }
365
366         // Version is set to current run, it will be used later to transfer data to Grid
367         AliCDBId id(path, firstRun, lastRun, GetCurrentRun(), -1);
368
369         if(! dynamic_cast<TObjString*> (metaData->GetProperty("RunUsed(TObjString)"))){
370                 TObjString runUsed = Form("%d", GetCurrentRun());
371                 metaData->SetProperty("RunUsed(TObjString)", runUsed.Clone());
372         }
373
374         Bool_t result = kFALSE;
375
376         if (!(AliCDBManager::Instance()->GetStorage(localUri))) {
377                 Log("SHUTTLE", Form("StoreLocally - Cannot activate local %s storage", cdbType));
378         } else {
379                 result = AliCDBManager::Instance()->GetStorage(localUri)
380                                         ->Put(object, id, metaData);
381         }
382
383         if(!result) {
384
385                 Log(fCurrentDetector, Form("StoreLocally - Can't store object <%s>!", id.ToString().Data()));
386         }
387
388         return result;
389 }
390
391 //______________________________________________________________________________________________
392 Bool_t AliShuttle::StoreOCDB()
393 {
394         //
395         // Called when preprocessor ends successfully or when previous storage attempt failed (kStoreError status)
396         // Calls underlying StoreOCDB(const char*) function twice, for OCDB and Reference storage.
397         // Then calls StoreRefFilesToGrid to store reference files. 
398         //
399         
400         if (fTestMode & kErrorGrid)
401         {
402                 Log("SHUTTLE", "StoreOCDB - In TESTMODE - Simulating error while storing in the Grid");
403                 Log(fCurrentDetector, "StoreOCDB - In TESTMODE - Simulating error while storing in the Grid");
404                 return kFALSE;
405         }
406         
407         AliInfo("Storing OCDB data ...");
408         Bool_t resultCDB = StoreOCDB(fgkMainCDB);
409
410         AliInfo("Storing reference data ...");
411         Bool_t resultRef = StoreOCDB(fgkMainRefStorage);
412         
413         AliInfo("Storing reference files ...");
414         Bool_t resultRefFiles = StoreRefFilesToGrid();
415         
416         return resultCDB && resultRef && resultRefFiles;
417 }
418
419 //______________________________________________________________________________________________
420 Bool_t AliShuttle::StoreOCDB(const TString& gridURI)
421 {
422         //
423         // Called by StoreOCDB(), performs actual storage to the main OCDB and reference storages (Grid)
424         //
425
426         TObjArray* gridIds=0;
427
428         Bool_t result = kTRUE;
429
430         const char* type = 0;
431         TString localURI;
432         if(gridURI == fgkMainCDB) {
433                 type = "OCDB";
434                 localURI = fgkLocalCDB;
435         } else if(gridURI == fgkMainRefStorage) {
436                 type = "reference";
437                 localURI = fgkLocalRefStorage;
438         } else {
439                 AliError(Form("Invalid storage URI: %s", gridURI.Data()));
440                 return kFALSE;
441         }
442
443         AliCDBManager* man = AliCDBManager::Instance();
444
445         AliCDBStorage *gridSto = man->GetStorage(gridURI);
446         if(!gridSto) {
447                 Log("SHUTTLE",
448                         Form("StoreOCDB - cannot activate main %s storage", type));
449                 return kFALSE;
450         }
451
452         gridIds = gridSto->GetQueryCDBList();
453
454         // get objects previously stored in local CDB
455         AliCDBStorage *localSto = man->GetStorage(localURI);
456         if(!localSto) {
457                 Log("SHUTTLE",
458                         Form("StoreOCDB - cannot activate local %s storage", type));
459                 return kFALSE;
460         }
461         AliCDBPath aPath(GetOfflineDetName(fCurrentDetector.Data()),"*","*");
462         // Local objects were stored with current run as Grid version!
463         TList* localEntries = localSto->GetAll(aPath.GetPath(), GetCurrentRun(), GetCurrentRun());
464         localEntries->SetOwner(1);
465
466         // loop on local stored objects
467         TIter localIter(localEntries);
468         AliCDBEntry *aLocEntry = 0;
469         while((aLocEntry = dynamic_cast<AliCDBEntry*> (localIter.Next()))){
470                 aLocEntry->SetOwner(1);
471                 AliCDBId aLocId = aLocEntry->GetId();
472                 aLocEntry->SetVersion(-1);
473                 aLocEntry->SetSubVersion(-1);
474
475                 // If local object is valid up to infinity we store it only if it is
476                 // the first unprocessed run!
477                 if (aLocId.GetLastRun() == AliCDBRunRange::Infinity() &&
478                         !fFirstUnprocessed[GetDetPos(fCurrentDetector)])
479                 {
480                         Log("SHUTTLE", Form("StoreOCDB - %s: object %s has validity infinite but "
481                                                 "there are previous unprocessed runs!",
482                                                 fCurrentDetector.Data(), aLocId.GetPath().Data()));
483                         continue;
484                 }
485
486                 // loop on Grid valid Id's
487                 Bool_t store = kTRUE;
488                 TIter gridIter(gridIds);
489                 AliCDBId* aGridId = 0;
490                 while((aGridId = dynamic_cast<AliCDBId*> (gridIter.Next()))){
491                         if(aGridId->GetPath() != aLocId.GetPath()) continue;
492                         // skip all objects valid up to infinity
493                         if(aGridId->GetLastRun() == AliCDBRunRange::Infinity()) continue;
494                         // if we get here, it means there's already some more recent object stored on Grid!
495                         store = kFALSE;
496                         break;
497                 }
498
499                 // If we get here, the file can be stored!
500                 Bool_t storeOk = gridSto->Put(aLocEntry);
501                 if(!store || storeOk){
502
503                         if (!store)
504                         {
505                                 Log(fCurrentDetector.Data(),
506                                         Form("StoreOCDB - A more recent object already exists in %s storage: <%s>",
507                                                 type, aGridId->ToString().Data()));
508                         } else {
509                                 Log("SHUTTLE",
510                                         Form("StoreOCDB - Object <%s> successfully put into %s storage",
511                                                 aLocId.ToString().Data(), type));
512                         }
513
514                         // removing local filename...
515                         TString filename;
516                         localSto->IdToFilename(aLocId, filename);
517                         AliInfo(Form("Removing local file %s", filename.Data()));
518                         RemoveFile(filename.Data());
519                         continue;
520                 } else  {
521                         Log("SHUTTLE",
522                                 Form("StoreOCDB - Grid %s storage of object <%s> failed",
523                                         type, aLocId.ToString().Data()));
524                         result = kFALSE;
525                 }
526         }
527         localEntries->Clear();
528
529         return result;
530 }
531
532 //______________________________________________________________________________________________
533 Bool_t AliShuttle::StoreReferenceFile(const char* detector, const char* localFile, const char* gridFileName)
534 {
535         //
536         // Stores reference file directly (without opening it). This function stores the file locally
537         // renaming it to #runNumber_gridFileName.
538         //
539         
540         if (fTestMode & kErrorStorage)
541         {
542                 Log(fCurrentDetector, "StoreReferenceFile - In TESTMODE - Simulating error while storing locally");
543                 return kFALSE;
544         }
545         
546         AliCDBManager* man = AliCDBManager::Instance();
547         AliCDBStorage* sto = man->GetStorage(fgkLocalRefStorage);
548         
549         TString localBaseFolder = sto->GetBaseFolder();
550         
551         TString targetDir;
552         targetDir.Form("%s/%s", localBaseFolder.Data(), detector);
553         
554         TString target;
555         target.Form("%s/%d_%s", targetDir.Data(), GetCurrentRun(), gridFileName);
556         
557         Int_t result = gSystem->GetPathInfo(targetDir, 0, (Long64_t*) 0, 0, 0);
558         if (result)
559         {
560                 result = gSystem->mkdir(targetDir, kTRUE);
561                 if (result != 0)
562                 {
563                         Log("SHUTTLE", Form("StoreReferenceFile - Error creating base directory %s", targetDir.Data()));
564                         return kFALSE;
565                 }
566         }
567                 
568         result = gSystem->CopyFile(localFile, target);
569
570         if (result == 0)
571         {
572                 Log("SHUTTLE", Form("StoreReferenceFile - Stored file %s locally to %s", localFile, target.Data()));
573                 return kTRUE;
574         }
575         else
576         {
577                 Log("SHUTTLE", Form("StoreReferenceFile - Storing file %s locally to %s failed", localFile, target.Data()));
578                 return kFALSE;
579         }       
580 }
581
582 //______________________________________________________________________________________________
583 Bool_t AliShuttle::StoreRefFilesToGrid()
584 {
585         //
586         // Transfers the reference file to the Grid.
587         // The final full path of the file is:
588         // gridBaseReferenceFolder/DET/#runNumber_gridFileName
589         //
590         
591         AliCDBManager* man = AliCDBManager::Instance();
592         AliCDBStorage* sto = man->GetStorage(fgkLocalRefStorage);
593         if (!sto)
594                 return kFALSE;
595         TString localBaseFolder = sto->GetBaseFolder();
596                 
597         TString dir;
598         dir.Form("%s/%s", localBaseFolder.Data(), fCurrentDetector.Data());
599         
600         AliCDBStorage* gridSto = man->GetStorage(fgkMainRefStorage);
601         if (!gridSto)
602                 return kFALSE;
603         TString gridBaseFolder = gridSto->GetBaseFolder();
604         TString alienDir;
605         alienDir.Form("%s%s", gridBaseFolder.Data(), fCurrentDetector.Data());
606         
607         if(!gGrid) 
608                 return kFALSE;
609         
610         // check that DET folder exists, otherwise create it
611         TGridResult* result = gGrid->Ls(alienDir.Data());
612         
613         if(!result)
614                 return kFALSE;
615         
616         if(!result->GetFileName(0)) {
617                 if(!gGrid->Mkdir(alienDir.Data(),"",0)){
618                         Log("SHUTTLE", Form("StoreRefFilesToGrid - Cannot create directory %s",
619                                         alienDir.Data()));
620                         return kFALSE;
621                 }
622                 
623         }
624
625         TString begin;
626         begin.Form("%d_", GetCurrentRun());
627         
628         TSystemDirectory* baseDir = new TSystemDirectory("/", dir);
629         TList* dirList            = baseDir->GetListOfFiles();
630         if (!dirList)
631                 return kTRUE;
632                 
633         Int_t nDirs               = dirList->GetEntries();
634         
635         Bool_t success = kTRUE;
636         
637         for (Int_t iDir=0; iDir<nDirs; ++iDir)
638         {
639                 TSystemFile* entry = dynamic_cast<TSystemFile*> (dirList->At(iDir));
640                 if (!entry)
641                         continue;
642                         
643                 if (entry->IsDirectory())
644                         continue;
645                         
646                 TString fileName(entry->GetName());
647                 if (!fileName.BeginsWith(begin))
648                         continue;
649                         
650                 TString fullLocalPath;
651                 fullLocalPath.Form("%s/%s", dir.Data(), fileName.Data());
652                 
653                 TString fullGridPath;
654                 fullGridPath.Form("alien://%s/%s", alienDir.Data(), fileName.Data());
655
656                 Log("SHUTTLE", Form("StoreRefFilesToGrid - Copying local file %s to %s", fullLocalPath.Data(), fullGridPath.Data()));
657                 
658                 TFileMerger fileMerger;
659                 Bool_t result = fileMerger.Cp(fullLocalPath, fullGridPath);
660                 
661                 if (result)
662                 {
663                         Log("SHUTTLE", Form("StoreRefFilesToGrid - Copying local file %s to %s succeeded", fullLocalPath.Data(), fullGridPath.Data()));
664                         RemoveFile(fullLocalPath);
665                 }
666                 else
667                 {
668                         Log("SHUTTLE", Form("StoreRefFilesToGrid - Copying local file %s to %s failed", fullLocalPath.Data(), fullGridPath.Data()));
669                         success = kFALSE;
670                 }
671         }
672         
673         delete baseDir;
674         
675         return success;
676 }
677
678 //______________________________________________________________________________________________
679 void AliShuttle::CleanLocalStorage(const TString& uri)
680 {
681         //
682         // Called in case the preprocessor is declared failed. Remove remaining objects from the local storages.
683         //
684
685         const char* type = 0;
686         if(uri == fgkLocalCDB) {
687                 type = "OCDB";
688         } else if(uri == fgkLocalRefStorage) {
689                 type = "reference";
690         } else {
691                 AliError(Form("Invalid storage URI: %s", uri.Data()));
692                 return;
693         }
694
695         AliCDBManager* man = AliCDBManager::Instance();
696
697         // open local storage
698         AliCDBStorage *localSto = man->GetStorage(uri);
699         if(!localSto) {
700                 Log("SHUTTLE",
701                         Form("CleanLocalStorage - cannot activate local %s storage", type));
702                 return;
703         }
704
705         TString filename(Form("%s/%s/*/Run*_v%d_s*.root",
706                 localSto->GetBaseFolder().Data(), fCurrentDetector.Data(), GetCurrentRun()));
707
708         AliInfo(Form("filename = %s", filename.Data()));
709
710         AliInfo(Form("Removing remaining local files from run %d and detector %s ...",
711                 GetCurrentRun(), fCurrentDetector.Data()));
712
713         RemoveFile(filename.Data());
714
715 }
716
717 //______________________________________________________________________________________________
718 void AliShuttle::RemoveFile(const char* filename)
719 {
720         //
721         // removes local file
722         //
723
724         TString command(Form("rm -f %s", filename));
725
726         Int_t result = gSystem->Exec(command.Data());
727         if(result != 0)
728         {
729                 Log("SHUTTLE", Form("RemoveFile - %s: Cannot remove file %s!",
730                         fCurrentDetector.Data(), filename));
731         }
732 }
733
734 //______________________________________________________________________________________________
735 AliShuttleStatus* AliShuttle::ReadShuttleStatus()
736 {
737         //
738         // Reads the AliShuttleStatus from the CDB
739         //
740
741         if (fStatusEntry){
742                 delete fStatusEntry;
743                 fStatusEntry = 0;
744         }
745
746         fStatusEntry = AliCDBManager::Instance()->GetStorage(GetLocalCDB())
747                 ->Get(Form("/SHUTTLE/STATUS/%s", fCurrentDetector.Data()), GetCurrentRun());
748
749         if (!fStatusEntry) return 0;
750         fStatusEntry->SetOwner(1);
751
752         AliShuttleStatus* status = dynamic_cast<AliShuttleStatus*> (fStatusEntry->GetObject());
753         if (!status) {
754                 AliError("Invalid object stored to CDB!");
755                 return 0;
756         }
757
758         return status;
759 }
760
761 //______________________________________________________________________________________________
762 Bool_t AliShuttle::WriteShuttleStatus(AliShuttleStatus* status)
763 {
764         //
765         // writes the status for one subdetector
766         //
767
768         if (fStatusEntry){
769                 delete fStatusEntry;
770                 fStatusEntry = 0;
771         }
772
773         Int_t run = GetCurrentRun();
774
775         AliCDBId id(AliCDBPath("SHUTTLE", "STATUS", fCurrentDetector), run, run);
776
777         fStatusEntry = new AliCDBEntry(status, id, new AliCDBMetaData);
778         fStatusEntry->SetOwner(1);
779
780         UInt_t result = AliCDBManager::Instance()->GetStorage(fgkLocalCDB)->Put(fStatusEntry);
781
782         if (!result) {
783                 Log("SHUTTLE", Form("WriteShuttleStatus - Failed for %s, run %d",
784                                                 fCurrentDetector.Data(), run));
785                 return kFALSE;
786         }
787         
788         SendMLInfo();
789
790         return kTRUE;
791 }
792
793 //______________________________________________________________________________________________
794 void AliShuttle::UpdateShuttleStatus(AliShuttleStatus::Status newStatus, Bool_t increaseCount)
795 {
796         //
797         // changes the AliShuttleStatus for the given detector and run to the given status
798         //
799
800         if (!fStatusEntry){
801                 AliError("UNEXPECTED: fStatusEntry empty");
802                 return;
803         }
804
805         AliShuttleStatus* status = dynamic_cast<AliShuttleStatus*> (fStatusEntry->GetObject());
806
807         if (!status){
808                 Log("SHUTTLE", "UNEXPECTED: status could not be read from current CDB entry");
809                 return;
810         }
811
812         TString actionStr = Form("UpdateShuttleStatus - %s: Changing state from %s to %s",
813                                 fCurrentDetector.Data(),
814                                 status->GetStatusName(),
815                                 status->GetStatusName(newStatus));
816         Log("SHUTTLE", actionStr);
817         SetLastAction(actionStr);
818
819         status->SetStatus(newStatus);
820         if (increaseCount) status->IncreaseCount();
821
822         AliCDBManager::Instance()->GetStorage(fgkLocalCDB)->Put(fStatusEntry);
823
824         SendMLInfo();
825 }
826
827 //______________________________________________________________________________________________
828 void AliShuttle::SendMLInfo()
829 {
830         //
831         // sends ML information about the current status of the current detector being processed
832         //
833         
834         AliShuttleStatus* status = dynamic_cast<AliShuttleStatus*> (fStatusEntry->GetObject());
835         
836         if (!status){
837                 Log("SHUTTLE", "SendMLInfo - UNEXPECTED: status could not be read from current CDB entry");
838                 return;
839         }
840         
841         TMonaLisaText  mlStatus(Form("%s_status", fCurrentDetector.Data()), status->GetStatusName());
842         TMonaLisaValue mlRetryCount(Form("%s_count", fCurrentDetector.Data()), status->GetCount());
843
844         TList mlList;
845         mlList.Add(&mlStatus);
846         mlList.Add(&mlRetryCount);
847
848         fMonaLisa->SendParameters(&mlList);
849 }
850
851 //______________________________________________________________________________________________
852 Bool_t AliShuttle::ContinueProcessing()
853 {
854         // this function reads the AliShuttleStatus information from CDB and
855         // checks if the processing should be continued
856         // if yes it returns kTRUE and updates the AliShuttleStatus with nextStatus
857
858         if (!fConfig->HostProcessDetector(fCurrentDetector)) return kFALSE;
859
860         AliPreprocessor* aPreprocessor =
861                 dynamic_cast<AliPreprocessor*> (fPreprocessorMap.GetValue(fCurrentDetector));
862         if (!aPreprocessor)
863         {
864                 AliInfo(Form("%s: no preprocessor registered", fCurrentDetector.Data()));
865                 return kFALSE;
866         }
867
868         AliShuttleLogbookEntry::Status entryStatus =
869                 fLogbookEntry->GetDetectorStatus(fCurrentDetector);
870
871         if(entryStatus != AliShuttleLogbookEntry::kUnprocessed) {
872                 AliInfo(Form("ContinueProcessing - %s is %s",
873                                 fCurrentDetector.Data(),
874                                 fLogbookEntry->GetDetectorStatusName(entryStatus)));
875                 return kFALSE;
876         }
877
878         // if we get here, according to Shuttle logbook subdetector is in UNPROCESSED state
879
880         // check if current run is first unprocessed run for current detector
881         if (fConfig->StrictRunOrder(fCurrentDetector) &&
882                 !fFirstUnprocessed[GetDetPos(fCurrentDetector)])
883         {
884                 Log("SHUTTLE", Form("ContinueProcessing - %s requires strict run ordering but this is not the first unprocessed run!"));
885                 return kFALSE;
886         }
887
888         AliShuttleStatus* status = ReadShuttleStatus();
889         if (!status) {
890                 // first time
891                 Log("SHUTTLE", Form("ContinueProcessing - %s: Processing first time",
892                                 fCurrentDetector.Data()));
893                 status = new AliShuttleStatus(AliShuttleStatus::kStarted);
894                 return WriteShuttleStatus(status);
895         }
896
897         // The following two cases shouldn't happen if Shuttle Logbook was correctly updated.
898         // If it happens it may mean Logbook updating failed... let's do it now!
899         if (status->GetStatus() == AliShuttleStatus::kDone ||
900             status->GetStatus() == AliShuttleStatus::kFailed){
901                 Log("SHUTTLE", Form("ContinueProcessing - %s is already %s. Updating Shuttle Logbook",
902                                         fCurrentDetector.Data(),
903                                         status->GetStatusName(status->GetStatus())));
904                 UpdateShuttleLogbook(fCurrentDetector.Data(),
905                                         status->GetStatusName(status->GetStatus()));
906                 return kFALSE;
907         }
908
909         if (status->GetStatus() == AliShuttleStatus::kStoreError) {
910                 Log("SHUTTLE",
911                         Form("ContinueProcessing - %s: Grid storage of one or more objects failed. Trying again now",
912                                 fCurrentDetector.Data()));
913                 UpdateShuttleStatus(AliShuttleStatus::kStoreStarted);
914                 if (StoreOCDB()){
915                         Log("SHUTTLE", Form("ContinueProcessing - %s: all objects successfully stored into main storage",
916                                 fCurrentDetector.Data()));
917                         UpdateShuttleStatus(AliShuttleStatus::kDone);
918                         UpdateShuttleLogbook(fCurrentDetector.Data(), "DONE");
919                 } else {
920                         Log("SHUTTLE",
921                                 Form("ContinueProcessing - %s: Grid storage failed again",
922                                         fCurrentDetector.Data()));
923                         UpdateShuttleStatus(AliShuttleStatus::kStoreError);
924                 }
925                 return kFALSE;
926         }
927
928         // if we get here, there is a restart
929         Bool_t cont = kFALSE;
930
931         // abort conditions
932         if (status->GetCount() >= fConfig->GetMaxRetries()) {
933                 Log("SHUTTLE", Form("ContinueProcessing - %s failed %d times in status %s - "
934                                 "Updating Shuttle Logbook", fCurrentDetector.Data(),
935                                 status->GetCount(), status->GetStatusName()));
936                 UpdateShuttleLogbook(fCurrentDetector.Data(), "FAILED");
937                 UpdateShuttleStatus(AliShuttleStatus::kFailed);
938
939                 // there may still be objects in local OCDB and reference storage
940                 // and FXS databases may be not updated: do it now!
941                 
942                 // TODO Currently disabled, we want to keep files in case of failure!
943                 // CleanLocalStorage(fgkLocalCDB);
944                 // CleanLocalStorage(fgkLocalRefStorage);
945                 // UpdateTableFailCase();
946                 
947                 // Send mail to detector expert!
948                 AliInfo(Form("Sending mail to %s expert...", fCurrentDetector.Data()));
949                 if (!SendMail())
950                         Log("SHUTTLE", Form("ContinueProcessing - Could not send mail to %s expert",
951                                         fCurrentDetector.Data()));
952
953         } else {
954                 Log("SHUTTLE", Form("ContinueProcessing - %s: restarting. "
955                                 "Aborted before with %s. Retry number %d.", fCurrentDetector.Data(),
956                                 status->GetStatusName(), status->GetCount()));
957                 Bool_t increaseCount = kTRUE;
958                 if (status->GetStatus() == AliShuttleStatus::kDCSError || status->GetStatus() == AliShuttleStatus::kDCSStarted)
959                         increaseCount = kFALSE;
960                 UpdateShuttleStatus(AliShuttleStatus::kStarted, increaseCount);
961                 cont = kTRUE;
962         }
963
964         return cont;
965 }
966
967 //______________________________________________________________________________________________
968 Bool_t AliShuttle::Process(AliShuttleLogbookEntry* entry)
969 {
970         //
971         // Makes data retrieval for all detectors in the configuration.
972         // entry: Shuttle logbook entry, contains run paramenters and status of detectors
973         // (Unprocessed, Inactive, Failed or Done).
974         // Returns kFALSE in case of error occured and kTRUE otherwise
975         //
976
977         if (!entry) return kFALSE;
978
979         fLogbookEntry = entry;
980
981         AliInfo(Form("\n\n \t\t\t^*^*^*^*^*^*^*^*^*^*^*^* run %d: START ^*^*^*^*^*^*^*^*^*^*^*^* \n",
982                                         GetCurrentRun()));
983
984         // create ML instance that monitors this run
985         fMonaLisa = new TMonaLisaWriter(Form("%d", GetCurrentRun()), "SHUTTLE", "aliendb1.cern.ch");
986         // disable monitoring of other parameters that come e.g. from TFile
987         gMonitoringWriter = 0;
988
989         // Send the information to ML
990         TMonaLisaText  mlStatus("SHUTTLE_status", "Processing");
991         TMonaLisaText  mlRunType("SHUTTLE_runtype", Form("%s (%s)", entry->GetRunType(), entry->GetRunParameter("log")));
992
993         TList mlList;
994         mlList.Add(&mlStatus);
995         mlList.Add(&mlRunType);
996
997         fMonaLisa->SendParameters(&mlList);
998
999         if (fLogbookEntry->IsDone())
1000         {
1001                 Log("SHUTTLE","Process - Shuttle is already DONE. Updating logbook");
1002                 UpdateShuttleLogbook("shuttle_done");
1003                 fLogbookEntry = 0;
1004                 return kTRUE;
1005         }
1006
1007         // read test mode if flag is set
1008         if (fReadTestMode)
1009         {
1010                 TString logEntry(entry->GetRunParameter("log"));
1011                 //printf("log entry = %s\n", logEntry.Data());
1012                 TString searchStr("Testmode: ");
1013                 Int_t pos = logEntry.Index(searchStr.Data());
1014                 //printf("%d\n", pos);
1015                 if (pos >= 0)
1016                 {
1017                         TSubString subStr = logEntry(pos + searchStr.Length(), logEntry.Length());
1018                         //printf("%s\n", subStr.String().Data());
1019                         TString newStr(subStr.Data());
1020                         TObjArray* token = newStr.Tokenize(' ');
1021                         if (token)
1022                         {
1023                                 //token->Print();
1024                                 TObjString* tmpStr = dynamic_cast<TObjString*> (token->First());
1025                                 if (tmpStr)
1026                                 {
1027                                         Int_t testMode = tmpStr->String().Atoi();
1028                                         if (testMode > 0)
1029                                         {
1030                                                 Log("SHUTTLE", Form("Enabling test mode %d", testMode));
1031                                                 SetTestMode((TestMode) testMode);
1032                                         }
1033                                 }
1034                                 delete token;          
1035                         }
1036                 }
1037         }
1038         
1039         fLogbookEntry->Print("all");
1040
1041         // Initialization
1042         Bool_t hasError = kFALSE;
1043
1044         AliCDBStorage *mainCDBSto = AliCDBManager::Instance()->GetStorage(fgkMainCDB);
1045         if(mainCDBSto) mainCDBSto->QueryCDB(GetCurrentRun());
1046         AliCDBStorage *mainRefSto = AliCDBManager::Instance()->GetStorage(fgkMainRefStorage);
1047         if(mainRefSto) mainRefSto->QueryCDB(GetCurrentRun());
1048
1049         // Loop on detectors in the configuration
1050         TIter iter(fConfig->GetDetectors());
1051         TObjString* aDetector = 0;
1052
1053         while ((aDetector = (TObjString*) iter.Next()))
1054         {
1055                 fCurrentDetector = aDetector->String();
1056
1057                 if (ContinueProcessing() == kFALSE) continue;
1058
1059                 AliInfo(Form("\n\n \t\t\t****** run %d - %s: START  ******",
1060                                                 GetCurrentRun(), aDetector->GetName()));
1061
1062                 for(Int_t iSys=0;iSys<3;iSys++) fFXSCalled[iSys]=kFALSE;
1063
1064                 Log(fCurrentDetector.Data(), "Starting processing");
1065
1066                 Int_t pid = fork();
1067
1068                 if (pid < 0)
1069                 {
1070                         Log("SHUTTLE", "ERROR: Forking failed");
1071                 }
1072                 else if (pid > 0)
1073                 {
1074                         // parent
1075                         AliInfo(Form("In parent process of %d - %s: Starting monitoring",
1076                                                         GetCurrentRun(), aDetector->GetName()));
1077
1078                         Long_t begin = time(0);
1079
1080                         int status; // to be used with waitpid, on purpose an int (not Int_t)!
1081                         while (waitpid(pid, &status, WNOHANG) == 0)
1082                         {
1083                                 Long_t expiredTime = time(0) - begin;
1084
1085                                 if (expiredTime > fConfig->GetPPTimeOut())
1086                                 {
1087                                         TString tmp;
1088                                         tmp.Form("Process of %s time out. Run time: %d seconds. Killing...",
1089                                                                 fCurrentDetector.Data(), expiredTime);
1090                                         Log("SHUTTLE", tmp);
1091                                         Log(fCurrentDetector, tmp);
1092
1093                                         kill(pid, 9);
1094
1095                                         UpdateShuttleStatus(AliShuttleStatus::kPPTimeOut);
1096                                         hasError = kTRUE;
1097
1098                                         gSystem->Sleep(1000);
1099                                 }
1100                                 else
1101                                 {
1102                                         gSystem->Sleep(1000);
1103                                         
1104                                         TString checkStr;
1105                                         checkStr.Form("ps -o vsize --pid %d | tail -n 1", pid);
1106                                         FILE* pipe = gSystem->OpenPipe(checkStr, "r");
1107                                         if (!pipe)
1108                                         {
1109                                                 Log("SHUTTLE", Form("Error: Could not open pipe to %s", checkStr.Data()));
1110                                                 continue;
1111                                         }
1112                                                 
1113                                         char buffer[100];
1114                                         if (!fgets(buffer, 100, pipe))
1115                                         {
1116                                                 Log("SHUTTLE", "Error: ps did not return anything");
1117                                                 gSystem->ClosePipe(pipe);
1118                                                 continue;
1119                                         }
1120                                         gSystem->ClosePipe(pipe);
1121                                         
1122                                         //Log("SHUTTLE", Form("ps returned %s", buffer));
1123                                         
1124                                         Int_t mem = 0;
1125                                         if ((sscanf(buffer, "%d\n", &mem) != 1) || !mem)
1126                                         {
1127                                                 Log("SHUTTLE", "Error: Could not parse output of ps");
1128                                                 continue;
1129                                         }
1130                                         
1131                                         if (expiredTime % 60 == 0)
1132                                                 Log("SHUTTLE", Form("%s: Checking process. Run time: %d seconds - Memory consumption: %d KB",
1133                                                                 fCurrentDetector.Data(), expiredTime, mem));
1134                                         
1135                                         if (mem > fConfig->GetPPMaxMem())
1136                                         {
1137                                                 TString tmp;
1138                                                 tmp.Form("Process exceeds maximum allowed memory (%d KB > %d KB). Killing...",
1139                                                         mem, fConfig->GetPPMaxMem());
1140                                                 Log("SHUTTLE", tmp);
1141                                                 Log(fCurrentDetector, tmp);
1142         
1143                                                 kill(pid, 9);
1144         
1145                                                 UpdateShuttleStatus(AliShuttleStatus::kPPOutOfMemory);
1146                                                 hasError = kTRUE;
1147         
1148                                                 gSystem->Sleep(1000);
1149                                         }
1150                                 }
1151                         }
1152
1153                         AliInfo(Form("In parent process of %d - %s: Client has terminated.",
1154                                                                 GetCurrentRun(), aDetector->GetName()));
1155
1156                         if (WIFEXITED(status))
1157                         {
1158                                 Int_t returnCode = WEXITSTATUS(status);
1159
1160                                 Log("SHUTTLE", Form("%s: the return code is %d", fCurrentDetector.Data(),
1161                                                                                 returnCode));
1162
1163                                 if (returnCode == 0) hasError = kTRUE;
1164                         }
1165                 }
1166                 else if (pid == 0)
1167                 {
1168                         // client
1169                         AliInfo(Form("In client process of %d - %s", GetCurrentRun(), aDetector->GetName()));
1170
1171                         Bool_t success = ProcessCurrentDetector();
1172                         if (success) // Preprocessor finished successfully!
1173                         { 
1174                                 // Update time_processed field in FXS DB
1175                                 if (UpdateTable() == kFALSE)
1176                                         Log("SHUTTLE", Form("Process - %s: Could not update FXS databases!"));
1177
1178                                 // Transfer the data from local storage to main storage (Grid)
1179                                 UpdateShuttleStatus(AliShuttleStatus::kStoreStarted);
1180                                 if (StoreOCDB() == kFALSE)
1181                                 {
1182                                         AliInfo(Form("\n \t\t\t****** run %d - %s: STORAGE ERROR ****** \n\n",
1183                                                         GetCurrentRun(), aDetector->GetName()));
1184                                         UpdateShuttleStatus(AliShuttleStatus::kStoreError);
1185                                         success = kFALSE;
1186                                 } else {
1187                                         AliInfo(Form("\n \t\t\t****** run %d - %s: DONE ****** \n\n",
1188                                                         GetCurrentRun(), aDetector->GetName()));
1189                                         UpdateShuttleStatus(AliShuttleStatus::kDone);
1190                                         UpdateShuttleLogbook(fCurrentDetector, "DONE");
1191                                 }
1192                         }
1193
1194                         for (UInt_t iSys=0; iSys<3; iSys++)
1195                         {
1196                                 if (fFXSCalled[iSys]) fFXSlist[iSys].Clear();
1197                         }
1198
1199                         AliInfo(Form("Client process of %d - %s is exiting now with %d.",
1200                                                         GetCurrentRun(), aDetector->GetName(), success));
1201
1202                         // the client exits here
1203                         gSystem->Exit(success);
1204
1205                         AliError("We should never get here!!!");
1206                 }
1207         }
1208
1209         AliInfo(Form("\n\n \t\t\t^*^*^*^*^*^*^*^*^*^*^*^* run %d: FINISH ^*^*^*^*^*^*^*^*^*^*^*^* \n",
1210                                                         GetCurrentRun()));
1211
1212         //check if shuttle is done for this run, if so update logbook
1213         TObjArray checkEntryArray;
1214         checkEntryArray.SetOwner(1);
1215         TString whereClause = Form("where run=%d", GetCurrentRun());
1216         if (!QueryShuttleLogbook(whereClause.Data(), checkEntryArray) || checkEntryArray.GetEntries() == 0) {
1217                 Log("SHUTTLE", Form("Process - Warning: Cannot check status of run %d on Shuttle logbook!",
1218                                                 GetCurrentRun()));
1219                 return hasError == kFALSE;
1220         }
1221
1222         AliShuttleLogbookEntry* checkEntry = dynamic_cast<AliShuttleLogbookEntry*>
1223                                                 (checkEntryArray.At(0));
1224
1225         if (checkEntry)
1226         {
1227                 if (checkEntry->IsDone())
1228                 {
1229                         Log("SHUTTLE","Process - Shuttle is DONE. Updating logbook");
1230                         UpdateShuttleLogbook("shuttle_done");
1231                 }
1232                 else
1233                 {
1234                         for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
1235                         {
1236                                 if (checkEntry->GetDetectorStatus(iDet) == AliShuttleLogbookEntry::kUnprocessed)
1237                                 {
1238                                         AliDebug(2, Form("Run %d: setting %s as \"not first time unprocessed\"",
1239                                                         checkEntry->GetRun(), GetDetName(iDet)));
1240                                         fFirstUnprocessed[iDet] = kFALSE;
1241                                 }
1242                         }
1243                 }
1244         }
1245
1246         // remove ML instance
1247         delete fMonaLisa;
1248         fMonaLisa = 0;
1249
1250         fLogbookEntry = 0;
1251
1252         return hasError == kFALSE;
1253 }
1254
1255 //______________________________________________________________________________________________
1256 Bool_t AliShuttle::ProcessCurrentDetector()
1257 {
1258         //
1259         // Makes data retrieval just for a specific detector (fCurrentDetector).
1260         // Threre should be a configuration for this detector.
1261
1262         AliInfo(Form("Retrieving values for %s, run %d", fCurrentDetector.Data(), GetCurrentRun()));
1263
1264         TMap dcsMap;
1265         dcsMap.SetOwner(1);
1266
1267         Bool_t aDCSError = kFALSE;
1268
1269         // call preprocessor
1270         AliPreprocessor* aPreprocessor =
1271                 dynamic_cast<AliPreprocessor*> (fPreprocessorMap.GetValue(fCurrentDetector));
1272
1273         aPreprocessor->Initialize(GetCurrentRun(), GetCurrentStartTime(), GetCurrentEndTime());
1274
1275         Bool_t processDCS = aPreprocessor->ProcessDCS();
1276
1277         if (!processDCS || fTestMode & kSkipDCS)
1278         {
1279                 AliInfo("In TESTMODE - Skipping DCS processing!");
1280         } 
1281         else if (fTestMode & kErrorDCS)
1282         {
1283                 AliInfo("In TESTMODE - Simulating DCS error");
1284                 UpdateShuttleStatus(AliShuttleStatus::kDCSError);
1285                 return kFALSE;
1286         } else {
1287
1288                 UpdateShuttleStatus(AliShuttleStatus::kDCSStarted);
1289
1290                 TString host(fConfig->GetDCSHost(fCurrentDetector));
1291                 Int_t port = fConfig->GetDCSPort(fCurrentDetector);
1292
1293                 // Retrieval of Aliases
1294                 TObjString* anAlias = 0;
1295                 Int_t iAlias = 1;
1296                 Int_t nTotAliases= ((TMap*)fConfig->GetDCSAliases(fCurrentDetector))->GetEntries();
1297                 TIter iterAliases(fConfig->GetDCSAliases(fCurrentDetector));
1298                 while ((anAlias = (TObjString*) iterAliases.Next()))
1299                 {
1300                         TObjArray *valueSet = new TObjArray();
1301                         valueSet->SetOwner(1);
1302
1303                         if (((iAlias-1) % 500) == 0 || iAlias == nTotAliases)
1304                                 AliInfo(Form("Querying DCS archive: alias %s (%d of %d)",
1305                                                 anAlias->GetName(), iAlias++, nTotAliases));
1306                         aDCSError = (GetValueSet(host, port, anAlias->String(), valueSet, kAlias) == 0);
1307
1308                         if(!aDCSError)
1309                         {
1310                                 dcsMap.Add(anAlias->Clone(), valueSet);
1311                         } else {
1312                                 Log(fCurrentDetector,
1313                                         Form("ProcessCurrentDetector - Error while retrieving alias %s",
1314                                                 anAlias->GetName()));
1315                                 UpdateShuttleStatus(AliShuttleStatus::kDCSError);
1316                                 dcsMap.DeleteAll();
1317                                 return kFALSE;
1318                         }
1319                 }
1320
1321                 // Retrieval of Data Points
1322                 TObjString* aDP = 0;
1323                 Int_t iDP = 0;
1324                 Int_t nTotDPs= ((TMap*)fConfig->GetDCSDataPoints(fCurrentDetector))->GetEntries();
1325                 TIter iterDP(fConfig->GetDCSDataPoints(fCurrentDetector));
1326                 while ((aDP = (TObjString*) iterDP.Next()))
1327                 {
1328                         TObjArray *valueSet = new TObjArray();
1329                         valueSet->SetOwner(1);
1330                         if (((iDP-1) % 500) == 0 || iDP == nTotDPs)
1331                                 AliInfo(Form("Querying DCS archive: DP %s (%d of %d)",
1332                                                 aDP->GetName(), iDP++, nTotDPs));
1333                         aDCSError = (GetValueSet(host, port, aDP->String(), valueSet, kDP) == 0);
1334
1335                         if(!aDCSError)
1336                         {
1337                                 dcsMap.Add(aDP->Clone(), valueSet);
1338                         } else {
1339                                 Log(fCurrentDetector,
1340                                         Form("ProcessCurrentDetector - Error while retrieving data point %s",
1341                                                 aDP->GetName()));
1342                                 UpdateShuttleStatus(AliShuttleStatus::kDCSError);
1343                                 dcsMap.DeleteAll();
1344                                 return kFALSE;
1345                         }
1346                 }
1347         }
1348
1349         // DCS Archive DB processing successful. Call Preprocessor!
1350         UpdateShuttleStatus(AliShuttleStatus::kPPStarted);
1351
1352         UInt_t returnValue = aPreprocessor->Process(&dcsMap);
1353
1354         if (returnValue > 0) // Preprocessor error!
1355         {
1356                 Log(fCurrentDetector, Form("Preprocessor failed. Process returned %d.", returnValue));
1357                 UpdateShuttleStatus(AliShuttleStatus::kPPError);
1358                 dcsMap.DeleteAll();
1359                 return kFALSE;
1360         }
1361         
1362         // preprocessor ok!
1363         UpdateShuttleStatus(AliShuttleStatus::kPPDone);
1364         Log(fCurrentDetector, Form("ProcessCurrentDetector - %s preprocessor returned success",
1365                                 fCurrentDetector.Data()));
1366
1367         dcsMap.DeleteAll();
1368
1369         return kTRUE;
1370 }
1371
1372 //______________________________________________________________________________________________
1373 Bool_t AliShuttle::QueryShuttleLogbook(const char* whereClause,
1374                 TObjArray& entries)
1375 {
1376         // Query DAQ's Shuttle logbook and fills detector status object.
1377         // Call QueryRunParameters to query DAQ logbook for run parameters.
1378         //
1379
1380         entries.SetOwner(1);
1381
1382         // check connection, in case connect
1383         if(!Connect(3)) return kFALSE;
1384
1385         TString sqlQuery;
1386         sqlQuery = Form("select * from %s %s order by run", fConfig->GetShuttlelbTable(), whereClause);
1387
1388         TSQLResult* aResult = fServer[3]->Query(sqlQuery);
1389         if (!aResult) {
1390                 AliError(Form("Can't execute query <%s>!", sqlQuery.Data()));
1391                 return kFALSE;
1392         }
1393
1394         AliDebug(2,Form("Query = %s", sqlQuery.Data()));
1395
1396         if(aResult->GetRowCount() == 0) {
1397                 AliInfo("No entries in Shuttle Logbook match request");
1398                 delete aResult;
1399                 return kTRUE;
1400         }
1401
1402         // TODO Check field count!
1403         const UInt_t nCols = 22;
1404         if (aResult->GetFieldCount() != (Int_t) nCols) {
1405                 AliError("Invalid SQL result field number!");
1406                 delete aResult;
1407                 return kFALSE;
1408         }
1409
1410         TSQLRow* aRow;
1411         while ((aRow = aResult->Next())) {
1412                 TString runString(aRow->GetField(0), aRow->GetFieldLength(0));
1413                 Int_t run = runString.Atoi();
1414
1415                 AliShuttleLogbookEntry *entry = QueryRunParameters(run);
1416                 if (!entry)
1417                         continue;
1418
1419                 // loop on detectors
1420                 for(UInt_t ii = 0; ii < nCols; ii++)
1421                         entry->SetDetectorStatus(aResult->GetFieldName(ii), aRow->GetField(ii));
1422
1423                 entries.AddLast(entry);
1424                 delete aRow;
1425         }
1426
1427         delete aResult;
1428         return kTRUE;
1429 }
1430
1431 //______________________________________________________________________________________________
1432 AliShuttleLogbookEntry* AliShuttle::QueryRunParameters(Int_t run)
1433 {
1434         //
1435         // Retrieve run parameters written in the DAQ logbook and sets them into AliShuttleLogbookEntry object
1436         //
1437
1438         // check connection, in case connect
1439         if (!Connect(3))
1440                 return 0;
1441
1442         TString sqlQuery;
1443         sqlQuery.Form("select * from %s where run=%d", fConfig->GetDAQlbTable(), run);
1444
1445         TSQLResult* aResult = fServer[3]->Query(sqlQuery);
1446         if (!aResult) {
1447                 AliError(Form("Can't execute query <%s>!", sqlQuery.Data()));
1448                 return 0;
1449         }
1450
1451         if (aResult->GetRowCount() == 0) {
1452                 Log("SHUTTLE", Form("QueryRunParameters - No entry in DAQ Logbook for run %d. Skipping", run));
1453                 delete aResult;
1454                 return 0;
1455         }
1456
1457         if (aResult->GetRowCount() > 1) {
1458                 AliError(Form("More than one entry in DAQ Logbook for run %d. Skipping", run));
1459                 delete aResult;
1460                 return 0;
1461         }
1462
1463         TSQLRow* aRow = aResult->Next();
1464         if (!aRow)
1465         {
1466                 AliError(Form("Could not retrieve row for run %d. Skipping", run));
1467                 delete aResult;
1468                 return 0;
1469         }
1470
1471         AliShuttleLogbookEntry* entry = new AliShuttleLogbookEntry(run);
1472
1473         for (Int_t ii = 0; ii < aResult->GetFieldCount(); ii++)
1474                 entry->SetRunParameter(aResult->GetFieldName(ii), aRow->GetField(ii));
1475
1476         UInt_t startTime = entry->GetStartTime();
1477         UInt_t endTime = entry->GetEndTime();
1478
1479         if (!startTime || !endTime || startTime > endTime) {
1480                 Log("SHUTTLE",
1481                         Form("QueryRunParameters - Invalid parameters for Run %d: startTime = %d, endTime = %d",
1482                                 run, startTime, endTime));
1483                 delete entry;
1484                 delete aRow;
1485                 delete aResult;
1486                 return 0;
1487         }
1488
1489         delete aRow;
1490         delete aResult;
1491
1492         return entry;
1493 }
1494
1495 //______________________________________________________________________________________________
1496 Bool_t AliShuttle::GetValueSet(const char* host, Int_t port, const char* entry,
1497                                 TObjArray* valueSet, DCSType type)
1498 {
1499         // Retrieve all "entry" data points from the DCS server
1500         // host, port: TSocket connection parameters
1501         // entry: name of the alias or data point
1502         // valueSet: array of retrieved AliDCSValue's
1503         // type: kAlias or kDP
1504
1505         AliDCSClient client(host, port, fTimeout, fRetries);
1506         if (!client.IsConnected())
1507         {
1508                 return kFALSE;
1509         }
1510
1511         Int_t result=0;
1512
1513         if (type == kAlias)
1514         {
1515                 result = client.GetAliasValues(entry,
1516                         GetCurrentStartTime(), GetCurrentEndTime(), valueSet);
1517         } else
1518         if (type == kDP)
1519         {
1520                 result = client.GetDPValues(entry,
1521                         GetCurrentStartTime(), GetCurrentEndTime(), valueSet);
1522         }
1523
1524         if (result < 0)
1525         {
1526                 Log(fCurrentDetector.Data(), Form("GetValueSet - Can't get '%s'! Reason: %s",
1527                         entry, AliDCSClient::GetErrorString(result)));
1528
1529                 if (result == AliDCSClient::fgkServerError)
1530                 {
1531                         Log(fCurrentDetector.Data(), Form("GetValueSet - Server error: %s",
1532                                 client.GetServerError().Data()));
1533                 }
1534
1535                 return kFALSE;
1536         }
1537
1538         return kTRUE;
1539 }
1540
1541 //______________________________________________________________________________________________
1542 const char* AliShuttle::GetFile(Int_t system, const char* detector,
1543                 const char* id, const char* source)
1544 {
1545         // Get calibration file from file exchange servers
1546         // First queris the FXS database for the file name, using the run, detector, id and source info
1547         // then calls RetrieveFile(filename) for actual copy to local disk
1548         // run: current run being processed (given by Logbook entry fLogbookEntry)
1549         // detector: the Preprocessor name
1550         // id: provided as a parameter by the Preprocessor
1551         // source: provided by the Preprocessor through GetFileSources function
1552
1553         // check if test mode should simulate a FXS error
1554         if (fTestMode & kErrorFXSFiles)
1555         {
1556                 Log(detector, Form("GetFile - In TESTMODE - Simulating error while connecting to %s FXS", GetSystemName(system)));
1557                 return 0;
1558         }
1559         
1560         // check connection, in case connect
1561         if (!Connect(system))
1562         {
1563                 Log(detector, Form("GetFile - Couldn't connect to %s FXS database", GetSystemName(system)));
1564                 return 0;
1565         }
1566
1567         // Query preparation
1568         TString sourceName(source);
1569         Int_t nFields = 3;
1570         TString sqlQueryStart = Form("select filePath,size,fileChecksum from %s where",
1571                                                                 fConfig->GetFXSdbTable(system));
1572         TString whereClause = Form("run=%d and detector=\"%s\" and fileId=\"%s\"",
1573                                                                 GetCurrentRun(), detector, id);
1574
1575         if (system == kDAQ)
1576         {
1577                 whereClause += Form(" and DAQsource=\"%s\"", source);
1578         }
1579         else if (system == kDCS)
1580         {
1581                 sourceName="none";
1582         }
1583         else if (system == kHLT)
1584         {
1585                 whereClause += Form(" and DDLnumbers=\"%s\"", source);
1586                 nFields = 3;
1587         }
1588
1589         TString sqlQuery = Form("%s %s", sqlQueryStart.Data(), whereClause.Data());
1590
1591         AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
1592
1593         // Query execution
1594         TSQLResult* aResult = 0;
1595         aResult = dynamic_cast<TSQLResult*> (fServer[system]->Query(sqlQuery));
1596         if (!aResult) {
1597                 Log(detector, Form("GetFileName - Can't execute SQL query to %s database for: id = %s, source = %s",
1598                                 GetSystemName(system), id, sourceName.Data()));
1599                 return 0;
1600         }
1601
1602         if(aResult->GetRowCount() == 0)
1603         {
1604                 Log(detector,
1605                         Form("GetFileName - No entry in %s FXS db for: id = %s, source = %s",
1606                                 GetSystemName(system), id, sourceName.Data()));
1607                 delete aResult;
1608                 return 0;
1609         }
1610
1611         if (aResult->GetRowCount() > 1) {
1612                 Log(detector,
1613                         Form("GetFileName - More than one entry in %s FXS db for: id = %s, source = %s",
1614                                 GetSystemName(system), id, sourceName.Data()));
1615                 delete aResult;
1616                 return 0;
1617         }
1618
1619         if (aResult->GetFieldCount() != nFields) {
1620                 Log(detector,
1621                         Form("GetFileName - Wrong field count in %s FXS db for: id = %s, source = %s",
1622                                 GetSystemName(system), id, sourceName.Data()));
1623                 delete aResult;
1624                 return 0;
1625         }
1626
1627         TSQLRow* aRow = dynamic_cast<TSQLRow*> (aResult->Next());
1628
1629         if (!aRow){
1630                 Log(detector, Form("GetFileName - Empty set result in %s FXS db from query: id = %s, source = %s",
1631                                 GetSystemName(system), id, sourceName.Data()));
1632                 delete aResult;
1633                 return 0;
1634         }
1635
1636         TString filePath(aRow->GetField(0), aRow->GetFieldLength(0));
1637         TString fileSize(aRow->GetField(1), aRow->GetFieldLength(1));
1638         TString fileChecksum(aRow->GetField(2), aRow->GetFieldLength(2));
1639
1640         delete aResult;
1641         delete aRow;
1642
1643         AliDebug(2, Form("filePath = %s; size = %s, fileChecksum = %s",
1644                                 filePath.Data(), fileSize.Data(), fileChecksum.Data()));
1645
1646         // retrieved file is renamed to make it unique
1647         TString localFileName = Form("%s_%s_%d_%s_%s.shuttle",
1648                                         GetSystemName(system), detector, GetCurrentRun(), id, sourceName.Data());
1649
1650
1651         // file retrieval from FXS
1652         UInt_t nRetries = 0;
1653         UInt_t maxRetries = 3;
1654         Bool_t result = kFALSE;
1655
1656         // copy!! if successful TSystem::Exec returns 0
1657         while(nRetries++ < maxRetries) {
1658                 AliDebug(2, Form("Trying to copy file. Retry # %d", nRetries));
1659                 result = RetrieveFile(system, filePath.Data(), localFileName.Data());
1660                 if(!result)
1661                 {
1662                         Log(detector, Form("GetFileName - Copy of file %s from %s FXS failed",
1663                                         filePath.Data(), GetSystemName(system)));
1664                         continue;
1665                 } else {
1666                         AliInfo(Form("File %s copied from %s FXS into %s/%s",
1667                                                 filePath.Data(), GetSystemName(system),
1668                                                 GetShuttleTempDir(), localFileName.Data()));
1669                 }
1670
1671                 if (fileChecksum.Length()>0)
1672                 {
1673                         // compare md5sum of local file with the one stored in the FXS DB
1674                         Int_t md5Comp = gSystem->Exec(Form("md5sum %s/%s |grep %s 2>&1 > /dev/null",
1675                                                 GetShuttleTempDir(), localFileName.Data(), fileChecksum.Data()));
1676
1677                         if (md5Comp != 0)
1678                         {
1679                                 Log(detector, Form("GetFileName - md5sum of file %s does not match with local copy!",
1680                                                         filePath.Data()));
1681                                 result = kFALSE;
1682                                 continue;
1683                         }
1684                 } else {
1685                         Log(fCurrentDetector, Form("GetFile - md5sum of file %s not set in %s database, skipping comparison",
1686                                                         filePath.Data(), GetSystemName(system)));
1687                 }
1688                 if (result) break;
1689         }
1690
1691         if(!result) return 0;
1692
1693         fFXSCalled[system]=kTRUE;
1694         TObjString *fileParams = new TObjString(Form("%s#!?!#%s", id, sourceName.Data()));
1695         fFXSlist[system].Add(fileParams);
1696
1697         static TString fullLocalFileName;
1698         fullLocalFileName = TString::Format("%s/%s", GetShuttleTempDir(), localFileName.Data());
1699
1700         AliInfo(Form("fullLocalFileName = %s", fullLocalFileName.Data()));
1701
1702         return fullLocalFileName.Data();
1703
1704 }
1705
1706 //______________________________________________________________________________________________
1707 Bool_t AliShuttle::RetrieveFile(UInt_t system, const char* fxsFileName, const char* localFileName)
1708 {
1709         //
1710         // Copies file from FXS to local Shuttle machine
1711         //
1712
1713         // check temp directory: trying to cd to temp; if it does not exist, create it
1714         AliDebug(2, Form("Copy file %s from %s FXS into %s/%s",
1715                         GetSystemName(system), fxsFileName, GetShuttleTempDir(), localFileName));
1716
1717         void* dir = gSystem->OpenDirectory(GetShuttleTempDir());
1718         if (dir == NULL) {
1719                 if (gSystem->mkdir(GetShuttleTempDir(), kTRUE)) {
1720                         AliError(Form("Can't open directory <%s>", GetShuttleTempDir()));
1721                         return kFALSE;
1722                 }
1723
1724         } else {
1725                 gSystem->FreeDirectory(dir);
1726         }
1727
1728         TString baseFXSFolder;
1729         if (system == kDAQ)
1730         {
1731                 baseFXSFolder = "FES/";
1732         }
1733         else if (system == kDCS)
1734         {
1735                 baseFXSFolder = "";
1736         }
1737         else if (system == kHLT)
1738         {
1739                 baseFXSFolder = "~/";
1740         }
1741
1742
1743         TString command = Form("scp -oPort=%d -2 %s@%s:%s%s %s/%s",
1744                 fConfig->GetFXSPort(system),
1745                 fConfig->GetFXSUser(system),
1746                 fConfig->GetFXSHost(system),
1747                 baseFXSFolder.Data(),
1748                 fxsFileName,
1749                 GetShuttleTempDir(),
1750                 localFileName);
1751
1752         AliDebug(2, Form("%s",command.Data()));
1753
1754         Bool_t result = (gSystem->Exec(command.Data()) == 0);
1755
1756         return result;
1757 }
1758
1759 //______________________________________________________________________________________________
1760 TList* AliShuttle::GetFileSources(Int_t system, const char* detector, const char* id)
1761 {
1762         //
1763         // Get sources producing the condition file Id from file exchange servers
1764         //
1765         
1766         // check if test mode should simulate a FXS error
1767         if (fTestMode & kErrorFXSSources)
1768         {
1769                 Log(detector, Form("GetFileSources - In TESTMODE - Simulating error while connecting to %s FXS", GetSystemName(system)));
1770                 return 0;
1771         }
1772
1773
1774         if (system == kDCS)
1775         {
1776                 AliError("DCS system has only one source of data!");
1777                 return NULL;
1778         }
1779
1780         // check connection, in case connect
1781         if (!Connect(system))
1782         {
1783                 Log(detector, Form("GetFile - Couldn't connect to %s FXS database", GetSystemName(system)));
1784                 return NULL;
1785         }
1786
1787         TString sourceName = 0;
1788         if (system == kDAQ)
1789         {
1790                 sourceName = "DAQsource";
1791         } else if (system == kHLT)
1792         {
1793                 sourceName = "DDLnumbers";
1794         }
1795
1796         TString sqlQueryStart = Form("select %s from %s where", sourceName.Data(), fConfig->GetFXSdbTable(system));
1797         TString whereClause = Form("run=%d and detector=\"%s\" and fileId=\"%s\"",
1798                                 GetCurrentRun(), detector, id);
1799         TString sqlQuery = Form("%s %s", sqlQueryStart.Data(), whereClause.Data());
1800
1801         AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
1802
1803         // Query execution
1804         TSQLResult* aResult;
1805         aResult = fServer[system]->Query(sqlQuery);
1806         if (!aResult) {
1807                 Log(detector, Form("GetFileSources - Can't execute SQL query to %s database for id: %s",
1808                                 GetSystemName(system), id));
1809                 return 0;
1810         }
1811
1812         if (aResult->GetRowCount() == 0)
1813         {
1814                 Log(detector,
1815                         Form("GetFileSources - No entry in %s FXS table for id: %s", GetSystemName(system), id));
1816                 delete aResult;
1817                 return 0;
1818         }
1819
1820         TSQLRow* aRow;
1821         TList *list = new TList();
1822         list->SetOwner(1);
1823
1824         while ((aRow = aResult->Next()))
1825         {
1826
1827                 TString source(aRow->GetField(0), aRow->GetFieldLength(0));
1828                 AliDebug(2, Form("%s = %s", sourceName.Data(), source.Data()));
1829                 list->Add(new TObjString(source));
1830                 delete aRow;
1831         }
1832
1833         delete aResult;
1834
1835         return list;
1836 }
1837
1838 //______________________________________________________________________________________________
1839 Bool_t AliShuttle::Connect(Int_t system)
1840 {
1841         // Connect to MySQL Server of the system's FXS MySQL databases
1842         // DAQ Logbook, Shuttle Logbook and DAQ FXS db are on the same host
1843         //
1844
1845         // check connection: if already connected return
1846         if(fServer[system] && fServer[system]->IsConnected()) return kTRUE;
1847
1848         TString dbHost, dbUser, dbPass, dbName;
1849
1850         if (system < 3) // FXS db servers
1851         {
1852                 dbHost = Form("mysql://%s:%d", fConfig->GetFXSdbHost(system), fConfig->GetFXSdbPort(system));
1853                 dbUser = fConfig->GetFXSdbUser(system);
1854                 dbPass = fConfig->GetFXSdbPass(system);
1855                 dbName =   fConfig->GetFXSdbName(system);
1856         } else { // Run & Shuttle logbook servers
1857         // TODO Will the Shuttle logbook server be the same as the Run logbook server ???
1858                 dbHost = Form("mysql://%s:%d", fConfig->GetDAQlbHost(), fConfig->GetDAQlbPort());
1859                 dbUser = fConfig->GetDAQlbUser();
1860                 dbPass = fConfig->GetDAQlbPass();
1861                 dbName =   fConfig->GetDAQlbDB();
1862         }
1863
1864         fServer[system] = TSQLServer::Connect(dbHost.Data(), dbUser.Data(), dbPass.Data());
1865         if (!fServer[system] || !fServer[system]->IsConnected()) {
1866                 if(system < 3)
1867                 {
1868                 AliError(Form("Can't establish connection to FXS database for %s",
1869                                         AliShuttleInterface::GetSystemName(system)));
1870                 } else {
1871                 AliError("Can't establish connection to Run logbook.");
1872                 }
1873                 if(fServer[system]) delete fServer[system];
1874                 return kFALSE;
1875         }
1876
1877         // Get tables
1878         TSQLResult* aResult=0;
1879         switch(system){
1880                 case kDAQ:
1881                         aResult = fServer[kDAQ]->GetTables(dbName.Data());
1882                         break;
1883                 case kDCS:
1884                         aResult = fServer[kDCS]->GetTables(dbName.Data());
1885                         break;
1886                 case kHLT:
1887                         aResult = fServer[kHLT]->GetTables(dbName.Data());
1888                         break;
1889                 default:
1890                         aResult = fServer[3]->GetTables(dbName.Data());
1891                         break;
1892         }
1893
1894         delete aResult;
1895         return kTRUE;
1896 }
1897
1898 //______________________________________________________________________________________________
1899 Bool_t AliShuttle::UpdateTable()
1900 {
1901         //
1902         // Update FXS table filling time_processed field in all rows corresponding to current run and detector
1903         //
1904
1905         Bool_t result = kTRUE;
1906
1907         for (UInt_t system=0; system<3; system++)
1908         {
1909                 if(!fFXSCalled[system]) continue;
1910
1911                 // check connection, in case connect
1912                 if (!Connect(system))
1913                 {
1914                         Log(fCurrentDetector, Form("UpdateTable - Couldn't connect to %s FXS database", GetSystemName(system)));
1915                         result = kFALSE;
1916                         continue;
1917                 }
1918
1919                 TTimeStamp now; // now
1920
1921                 // Loop on FXS list entries
1922                 TIter iter(&fFXSlist[system]);
1923                 TObjString *aFXSentry=0;
1924                 while ((aFXSentry = dynamic_cast<TObjString*> (iter.Next())))
1925                 {
1926                         TString aFXSentrystr = aFXSentry->String();
1927                         TObjArray *aFXSarray = aFXSentrystr.Tokenize("#!?!#");
1928                         if (!aFXSarray || aFXSarray->GetEntries() != 2 )
1929                         {
1930                                 Log(fCurrentDetector, Form("UpdateTable - error updating %s FXS entry. Check string: <%s>",
1931                                         GetSystemName(system), aFXSentrystr.Data()));
1932                                 if(aFXSarray) delete aFXSarray;
1933                                 result = kFALSE;
1934                                 continue;
1935                         }
1936                         const char* fileId = ((TObjString*) aFXSarray->At(0))->GetName();
1937                         const char* source = ((TObjString*) aFXSarray->At(1))->GetName();
1938
1939                         TString whereClause;
1940                         if (system == kDAQ)
1941                         {
1942                                 whereClause = Form("where run=%d and detector=\"%s\" and fileId=\"%s\" and DAQsource=\"%s\";",
1943                                                         GetCurrentRun(), fCurrentDetector.Data(), fileId, source);
1944                         }
1945                         else if (system == kDCS)
1946                         {
1947                                 whereClause = Form("where run=%d and detector=\"%s\" and fileId=\"%s\";",
1948                                                         GetCurrentRun(), fCurrentDetector.Data(), fileId);
1949                         }
1950                         else if (system == kHLT)
1951                         {
1952                                 whereClause = Form("where run=%d and detector=\"%s\" and fileId=\"%s\" and DDLnumbers=\"%s\";",
1953                                                         GetCurrentRun(), fCurrentDetector.Data(), fileId, source);
1954                         }
1955
1956                         delete aFXSarray;
1957
1958                         TString sqlQuery = Form("update %s set time_processed=%d %s", fConfig->GetFXSdbTable(system),
1959                                                                 now.GetSec(), whereClause.Data());
1960
1961                         AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
1962
1963                         // Query execution
1964                         TSQLResult* aResult;
1965                         aResult = dynamic_cast<TSQLResult*> (fServer[system]->Query(sqlQuery));
1966                         if (!aResult)
1967                         {
1968                                 Log(fCurrentDetector, Form("UpdateTable - %s db: can't execute SQL query <%s>",
1969                                                                 GetSystemName(system), sqlQuery.Data()));
1970                                 result = kFALSE;
1971                                 continue;
1972                         }
1973                         delete aResult;
1974                 }
1975         }
1976
1977         return result;
1978 }
1979
1980 //______________________________________________________________________________________________
1981 Bool_t AliShuttle::UpdateTableFailCase()
1982 {
1983         // Update FXS table filling time_processed field in all rows corresponding to current run and detector
1984         // this is called in case the preprocessor is declared failed for the current run, because
1985         // the fields are updated only in case of success
1986
1987         Bool_t result = kTRUE;
1988
1989         for (UInt_t system=0; system<3; system++)
1990         {
1991                 // check connection, in case connect
1992                 if (!Connect(system))
1993                 {
1994                         Log(fCurrentDetector, Form("UpdateTableFailCase - Couldn't connect to %s FXS database",
1995                                                         GetSystemName(system)));
1996                         result = kFALSE;
1997                         continue;
1998                 }
1999
2000                 TTimeStamp now; // now
2001
2002                 // Loop on FXS list entries
2003
2004                 TString whereClause = Form("where run=%d and detector=\"%s\";",
2005                                                 GetCurrentRun(), fCurrentDetector.Data());
2006
2007
2008                 TString sqlQuery = Form("update %s set time_processed=%d %s", fConfig->GetFXSdbTable(system),
2009                                                         now.GetSec(), whereClause.Data());
2010
2011                 AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
2012
2013                 // Query execution
2014                 TSQLResult* aResult;
2015                 aResult = dynamic_cast<TSQLResult*> (fServer[system]->Query(sqlQuery));
2016                 if (!aResult)
2017                 {
2018                         Log(fCurrentDetector, Form("UpdateTableFailCase - %s db: can't execute SQL query <%s>",
2019                                                         GetSystemName(system), sqlQuery.Data()));
2020                         result = kFALSE;
2021                         continue;
2022                 }
2023                 delete aResult;
2024         }
2025
2026         return result;
2027 }
2028
2029 //______________________________________________________________________________________________
2030 Bool_t AliShuttle::UpdateShuttleLogbook(const char* detector, const char* status)
2031 {
2032         //
2033         // Update Shuttle logbook filling detector or shuttle_done column
2034         // ex. of usage: UpdateShuttleLogbook("PHOS", "DONE") or UpdateShuttleLogbook("shuttle_done")
2035         //
2036
2037         // check connection, in case connect
2038         if(!Connect(3)){
2039                 Log("SHUTTLE", "UpdateShuttleLogbook - Couldn't connect to DAQ Logbook.");
2040                 return kFALSE;
2041         }
2042
2043         TString detName(detector);
2044         TString setClause;
2045         if(detName == "shuttle_done")
2046         {
2047                 setClause = "set shuttle_done=1";
2048
2049                 // Send the information to ML
2050                 TMonaLisaText  mlStatus("SHUTTLE_status", "Done");
2051
2052                 TList mlList;
2053                 mlList.Add(&mlStatus);
2054
2055                 fMonaLisa->SendParameters(&mlList);
2056         } else {
2057                 TString statusStr(status);
2058                 if(statusStr.Contains("done", TString::kIgnoreCase) ||
2059                    statusStr.Contains("failed", TString::kIgnoreCase)){
2060                         setClause = Form("set %s=\"%s\"", detector, status);
2061                 } else {
2062                         Log("SHUTTLE",
2063                                 Form("UpdateShuttleLogbook - Invalid status <%s> for detector %s",
2064                                         status, detector));
2065                         return kFALSE;
2066                 }
2067         }
2068
2069         TString whereClause = Form("where run=%d", GetCurrentRun());
2070
2071         TString sqlQuery = Form("update %s %s %s",
2072                                         fConfig->GetShuttlelbTable(), setClause.Data(), whereClause.Data());
2073
2074         AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
2075
2076         // Query execution
2077         TSQLResult* aResult;
2078         aResult = dynamic_cast<TSQLResult*> (fServer[3]->Query(sqlQuery));
2079         if (!aResult) {
2080                 Log("SHUTTLE", Form("UpdateShuttleLogbook - Can't execute query <%s>", sqlQuery.Data()));
2081                 return kFALSE;
2082         }
2083         delete aResult;
2084
2085         return kTRUE;
2086 }
2087
2088 //______________________________________________________________________________________________
2089 Int_t AliShuttle::GetCurrentRun() const
2090 {
2091         //
2092         // Get current run from logbook entry
2093         //
2094
2095         return fLogbookEntry ? fLogbookEntry->GetRun() : -1;
2096 }
2097
2098 //______________________________________________________________________________________________
2099 UInt_t AliShuttle::GetCurrentStartTime() const
2100 {
2101         //
2102         // get current start time
2103         //
2104
2105         return fLogbookEntry ? fLogbookEntry->GetStartTime() : 0;
2106 }
2107
2108 //______________________________________________________________________________________________
2109 UInt_t AliShuttle::GetCurrentEndTime() const
2110 {
2111         //
2112         // get current end time from logbook entry
2113         //
2114
2115         return fLogbookEntry ? fLogbookEntry->GetEndTime() : 0;
2116 }
2117
2118 //______________________________________________________________________________________________
2119 void AliShuttle::Log(const char* detector, const char* message)
2120 {
2121         //
2122         // Fill log string with a message
2123         //
2124
2125         void* dir = gSystem->OpenDirectory(GetShuttleLogDir());
2126         if (dir == NULL) {
2127                 if (gSystem->mkdir(GetShuttleLogDir(), kTRUE)) {
2128                         AliError(Form("Can't open directory <%s>", GetShuttleLogDir()));
2129                         return;
2130                 }
2131
2132         } else {
2133                 gSystem->FreeDirectory(dir);
2134         }
2135
2136         TString toLog = Form("%s (%d): %s - ", TTimeStamp(time(0)).AsString("s"), getpid(), detector);
2137         if (GetCurrentRun() >= 0) 
2138                 toLog += Form("run %d - ", GetCurrentRun());
2139         toLog += Form("%s", message);
2140
2141         AliInfo(toLog.Data());
2142
2143         TString fileName;
2144         if (GetCurrentRun() >= 0) 
2145                 fileName.Form("%s/%s_%d.log", GetShuttleLogDir(), detector, GetCurrentRun());
2146         else
2147                 fileName.Form("%s/%s.log", GetShuttleLogDir(), detector);
2148         
2149         gSystem->ExpandPathName(fileName);
2150
2151         ofstream logFile;
2152         logFile.open(fileName, ofstream::out | ofstream::app);
2153
2154         if (!logFile.is_open()) {
2155                 AliError(Form("Could not open file %s", fileName.Data()));
2156                 return;
2157         }
2158
2159         logFile << toLog.Data() << "\n";
2160
2161         logFile.close();
2162 }
2163
2164 //______________________________________________________________________________________________
2165 Bool_t AliShuttle::Collect(Int_t run)
2166 {
2167         //
2168         // Collects conditions data for all UNPROCESSED run written to DAQ LogBook in case of run = -1 (default)
2169         // If a dedicated run is given this run is processed
2170         //
2171         // In operational mode, this is the Shuttle function triggered by the EOR signal.
2172         //
2173
2174         if (run == -1)
2175                 Log("SHUTTLE","Collect - Shuttle called. Collecting conditions data for unprocessed runs");
2176         else
2177                 Log("SHUTTLE", Form("Collect - Shuttle called. Collecting conditions data for run %d", run));
2178
2179         SetLastAction("Starting");
2180
2181         TString whereClause("where shuttle_done=0");
2182         if (run != -1)
2183                 whereClause += Form(" and run=%d", run);
2184
2185         TObjArray shuttleLogbookEntries;
2186         if (!QueryShuttleLogbook(whereClause, shuttleLogbookEntries))
2187         {
2188                 Log("SHUTTLE", "Collect - Can't retrieve entries from Shuttle logbook");
2189                 return kFALSE;
2190         }
2191
2192         if (shuttleLogbookEntries.GetEntries() == 0)
2193         {
2194                 if (run == -1)
2195                         Log("SHUTTLE","Collect - Found no UNPROCESSED runs in Shuttle logbook");
2196                 else
2197                         Log("SHUTTLE", Form("Collect - Run %d is already DONE "
2198                                                 "or it does not exist in Shuttle logbook", run));
2199                 return kTRUE;
2200         }
2201
2202         for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
2203                 fFirstUnprocessed[iDet] = kTRUE;
2204
2205         if (run != -1)
2206         {
2207                 // query Shuttle logbook for earlier runs, check if some detectors are unprocessed,
2208                 // flag them into fFirstUnprocessed array
2209                 TString whereClause(Form("where shuttle_done=0 and run < %d", run));
2210                 TObjArray tmpLogbookEntries;
2211                 if (!QueryShuttleLogbook(whereClause, tmpLogbookEntries))
2212                 {
2213                         Log("SHUTTLE", "Collect - Can't retrieve entries from Shuttle logbook");
2214                         return kFALSE;
2215                 }
2216
2217                 TIter iter(&tmpLogbookEntries);
2218                 AliShuttleLogbookEntry* anEntry = 0;
2219                 while ((anEntry = dynamic_cast<AliShuttleLogbookEntry*> (iter.Next())))
2220                 {
2221                         for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
2222                         {
2223                                 if (anEntry->GetDetectorStatus(iDet) == AliShuttleLogbookEntry::kUnprocessed)
2224                                 {
2225                                         AliDebug(2, Form("Run %d: setting %s as \"not first time unprocessed\"",
2226                                                         anEntry->GetRun(), GetDetName(iDet)));
2227                                         fFirstUnprocessed[iDet] = kFALSE;
2228                                 }
2229                         }
2230
2231                 }
2232
2233         }
2234
2235         if (!RetrieveConditionsData(shuttleLogbookEntries))
2236         {
2237                 Log("SHUTTLE", "Collect - Process of at least one run failed");
2238                 return kFALSE;
2239         }
2240
2241         Log("SHUTTLE", "Collect - Requested run(s) successfully processed");
2242         return kTRUE;
2243 }
2244
2245 //______________________________________________________________________________________________
2246 Bool_t AliShuttle::RetrieveConditionsData(const TObjArray& dateEntries)
2247 {
2248         //
2249         // Retrieve conditions data for all runs that aren't processed yet
2250         //
2251
2252         Bool_t hasError = kFALSE;
2253
2254         TIter iter(&dateEntries);
2255         AliShuttleLogbookEntry* anEntry;
2256
2257         while ((anEntry = (AliShuttleLogbookEntry*) iter.Next())){
2258                 if (!Process(anEntry)){
2259                         hasError = kTRUE;
2260                 }
2261
2262                 // clean SHUTTLE temp directory
2263                 TString filename = Form("%s/*.shuttle", GetShuttleTempDir());
2264                 RemoveFile(filename.Data());
2265         }
2266
2267         return hasError == kFALSE;
2268 }
2269
2270 //______________________________________________________________________________________________
2271 ULong_t AliShuttle::GetTimeOfLastAction() const
2272 {
2273         //
2274         // Gets time of last action
2275         //
2276
2277         ULong_t tmp;
2278
2279         fMonitoringMutex->Lock();
2280
2281         tmp = fLastActionTime;
2282
2283         fMonitoringMutex->UnLock();
2284
2285         return tmp;
2286 }
2287
2288 //______________________________________________________________________________________________
2289 const TString AliShuttle::GetLastAction() const
2290 {
2291         //
2292         // returns a string description of the last action
2293         //
2294
2295         TString tmp;
2296
2297         fMonitoringMutex->Lock();
2298         
2299         tmp = fLastAction;
2300         
2301         fMonitoringMutex->UnLock();
2302
2303         return tmp;
2304 }
2305
2306 //______________________________________________________________________________________________
2307 void AliShuttle::SetLastAction(const char* action)
2308 {
2309         //
2310         // updates the monitoring variables
2311         //
2312
2313         fMonitoringMutex->Lock();
2314
2315         fLastAction = action;
2316         fLastActionTime = time(0);
2317         
2318         fMonitoringMutex->UnLock();
2319 }
2320
2321 //______________________________________________________________________________________________
2322 const char* AliShuttle::GetRunParameter(const char* param)
2323 {
2324         //
2325         // returns run parameter read from DAQ logbook
2326         //
2327
2328         if(!fLogbookEntry) {
2329                 AliError("No logbook entry!");
2330                 return 0;
2331         }
2332
2333         return fLogbookEntry->GetRunParameter(param);
2334 }
2335
2336 //______________________________________________________________________________________________
2337 AliCDBEntry* AliShuttle::GetFromOCDB(const char* detector, const AliCDBPath& path)
2338 {
2339         //
2340         // returns object from OCDB valid for current run
2341         //
2342
2343         if (fTestMode & kErrorOCDB)
2344         {
2345                 Log(detector, "GetFromOCDB - In TESTMODE - Simulating error with OCDB");
2346                 return 0;
2347         }
2348         
2349         AliCDBStorage *sto = AliCDBManager::Instance()->GetStorage(fgkMainCDB);
2350         if (!sto)
2351         {
2352                 Log(detector, "GetFromOCDB - Cannot activate main OCDB for query!");
2353                 return 0;
2354         }
2355
2356         return dynamic_cast<AliCDBEntry*> (sto->Get(path, GetCurrentRun()));
2357 }
2358
2359 //______________________________________________________________________________________________
2360 Bool_t AliShuttle::SendMail()
2361 {
2362         //
2363         // sends a mail to the subdetector expert in case of preprocessor error
2364         //
2365         
2366         if (fTestMode != kNone)
2367                 return kTRUE;
2368
2369         void* dir = gSystem->OpenDirectory(GetShuttleLogDir());
2370         if (dir == NULL)
2371         {
2372                 if (gSystem->mkdir(GetShuttleLogDir(), kTRUE))
2373                 {
2374                         AliError(Form("Can't open directory <%s>", GetShuttleLogDir()));
2375                         return kFALSE;
2376                 }
2377
2378         } else {
2379                 gSystem->FreeDirectory(dir);
2380         }
2381
2382         TString bodyFileName;
2383         bodyFileName.Form("%s/mail.body", GetShuttleLogDir());
2384         gSystem->ExpandPathName(bodyFileName);
2385
2386         ofstream mailBody;
2387         mailBody.open(bodyFileName, ofstream::out);
2388
2389         if (!mailBody.is_open())
2390         {
2391                 AliError(Form("Could not open mail body file %s", bodyFileName.Data()));
2392                 return kFALSE;
2393         }
2394
2395         TString to="";
2396         TIter iterExperts(fConfig->GetResponsibles(fCurrentDetector));
2397         TObjString *anExpert=0;
2398         while ((anExpert = (TObjString*) iterExperts.Next()))
2399         {
2400                 to += Form("%s,", anExpert->GetName());
2401         }
2402         to.Remove(to.Length()-1);
2403         AliDebug(2, Form("to: %s",to.Data()));
2404
2405         // TODO this will be removed...
2406         if (to.Contains("not_yet_set")) {
2407                 AliInfo("List of detector responsibles not yet set!");
2408                 return kFALSE;
2409         }
2410
2411         TString cc="alberto.colla@cern.ch";
2412
2413         TString subject = Form("%s Shuttle preprocessor error in run %d !",
2414                                 fCurrentDetector.Data(), GetCurrentRun());
2415         AliDebug(2, Form("subject: %s", subject.Data()));
2416
2417         TString body = Form("Dear %s expert(s), \n\n", fCurrentDetector.Data());
2418         body += Form("SHUTTLE just detected that your preprocessor "
2419                         "exited with ERROR state in run %d!!\n\n", GetCurrentRun());
2420         body += Form("Please check %s status on the web page asap!\n\n", fCurrentDetector.Data());
2421         body += Form("The last 10 lines of %s log file are following:\n\n");
2422
2423         AliDebug(2, Form("Body begin: %s", body.Data()));
2424
2425         mailBody << body.Data();
2426         mailBody.close();
2427         mailBody.open(bodyFileName, ofstream::out | ofstream::app);
2428
2429         TString logFileName = Form("%s/%s_%d.log", GetShuttleLogDir(), fCurrentDetector.Data(), GetCurrentRun());
2430         TString tailCommand = Form("tail -n 10 %s >> %s", logFileName.Data(), bodyFileName.Data());
2431         if (gSystem->Exec(tailCommand.Data()))
2432         {
2433                 mailBody << Form("%s log file not found ...\n\n", fCurrentDetector.Data());
2434         }
2435
2436         TString endBody = Form("------------------------------------------------------\n\n");
2437         endBody += Form("In case of problems please contact the SHUTTLE core team.\n\n");
2438         endBody += "Please do not answer this message directly, it is automatically generated.\n\n";
2439         endBody += "Sincerely yours,\n\n \t\t\tthe SHUTTLE\n";
2440
2441         AliDebug(2, Form("Body end: %s", endBody.Data()));
2442
2443         mailBody << endBody.Data();
2444
2445         mailBody.close();
2446
2447         // send mail!
2448         TString mailCommand = Form("mail -s \"%s\" -c %s %s < %s",
2449                                                 subject.Data(),
2450                                                 cc.Data(),
2451                                                 to.Data(),
2452                                                 bodyFileName.Data());
2453         AliDebug(2, Form("mail command: %s", mailCommand.Data()));
2454
2455         Bool_t result = gSystem->Exec(mailCommand.Data());
2456
2457         return result == 0;
2458 }
2459
2460 //______________________________________________________________________________________________
2461 const char* AliShuttle::GetRunType()
2462 {
2463         //
2464         // returns run type read from "run type" logbook
2465         //
2466
2467         if(!fLogbookEntry) {
2468                 AliError("No logbook entry!");
2469                 return 0;
2470         }
2471
2472         return fLogbookEntry->GetRunType();
2473 }
2474
2475 //______________________________________________________________________________________________
2476 void AliShuttle::SetShuttleTempDir(const char* tmpDir)
2477 {
2478         //
2479         // sets Shuttle temp directory
2480         //
2481
2482         fgkShuttleTempDir = gSystem->ExpandPathName(tmpDir);
2483 }
2484
2485 //______________________________________________________________________________________________
2486 void AliShuttle::SetShuttleLogDir(const char* logDir)
2487 {
2488         //
2489         // sets Shuttle log directory
2490         //
2491
2492         fgkShuttleLogDir = gSystem->ExpandPathName(logDir);
2493 }