]> git.uio.no Git - u/mrichter/AliRoot.git/blob - SHUTTLE/AliShuttle.cxx
1) Storing of files to the Grid is now done _after_ your preprocessors succeeded...
[u/mrichter/AliRoot.git] / SHUTTLE / AliShuttle.cxx
1 /**************************************************************************
2  * Copyright(c) 1998-1999, ALICE Experiment at CERN, All rights reserved. *
3  *                                                                        *
4  * Author: The ALICE Off-line Project.                                    *
5  * Contributors are mentioned in the code where appropriate.              *
6  *                                                                        *
7  * Permission to use, copy, modify and distribute this software and its   *
8  * documentation strictly for non-commercial purposes is hereby granted   *
9  * without fee, provided that the above copyright notice appears in all   *
10  * copies and that both the copyright notice and this permission notice   *
11  * appear in the supporting documentation. The authors make no claims     *
12  * about the suitability of this software for any purpose. It is          *
13  * provided "as is" without express or implied warranty.                  *
14  **************************************************************************/
15
16 /*
17 $Log$
18 Revision 1.33  2007/04/03 13:56:01  acolla
19 Grid Storage at the end of preprocessing. Added virtual method to disable DCS query according to the
20 run type.
21
22 Revision 1.32  2007/02/28 10:41:56  acolla
23 Run type field added in SHUTTLE framework. Run type is read from "run type" logbook and retrieved by
24 AliPreprocessor::GetRunType() function.
25 Added some ldap definition files.
26
27 Revision 1.30  2007/02/13 11:23:21  acolla
28 Moved getters and setters of Shuttle's main OCDB/Reference, local
29 OCDB/Reference, temp and log folders to AliShuttleInterface
30
31 Revision 1.27  2007/01/30 17:52:42  jgrosseo
32 adding monalisa monitoring
33
34 Revision 1.26  2007/01/23 19:20:03  acolla
35 Removed old ldif files, added TOF, MCH ldif files. Added some options in
36 AliShuttleConfig::Print. Added in Ali Shuttle: SetShuttleTempDir and
37 SetShuttleLogDir
38
39 Revision 1.25  2007/01/15 19:13:52  acolla
40 Moved some AliInfo to AliDebug in SendMail function
41
42 Revision 1.21  2006/12/07 08:51:26  jgrosseo
43 update (alberto):
44 table, db names in ldap configuration
45 added GRP preprocessor
46 DCS data can also be retrieved by data point
47
48 Revision 1.20  2006/11/16 16:16:48  jgrosseo
49 introducing strict run ordering flag
50 removed giving preprocessor name to preprocessor, they have to know their name themselves ;-)
51
52 Revision 1.19  2006/11/06 14:23:04  jgrosseo
53 major update (Alberto)
54 o) reading of run parameters from the logbook
55 o) online offline naming conversion
56 o) standalone DCSclient package
57
58 Revision 1.18  2006/10/20 15:22:59  jgrosseo
59 o) Adding time out to the execution of the preprocessors: The Shuttle forks and the parent process monitors the child
60 o) Merging Collect, CollectAll, CollectNew function
61 o) Removing implementation of empty copy constructors (declaration still there!)
62
63 Revision 1.17  2006/10/05 16:20:55  jgrosseo
64 adapting to new CDB classes
65
66 Revision 1.16  2006/10/05 15:46:26  jgrosseo
67 applying to the new interface
68
69 Revision 1.15  2006/10/02 16:38:39  jgrosseo
70 update (alberto):
71 fixed memory leaks
72 storing of objects that failed to be stored to the grid before
73 interfacing of shuttle status table in daq system
74
75 Revision 1.14  2006/08/29 09:16:05  jgrosseo
76 small update
77
78 Revision 1.13  2006/08/15 10:50:00  jgrosseo
79 effc++ corrections (alberto)
80
81 Revision 1.12  2006/08/08 14:19:29  jgrosseo
82 Update to shuttle classes (Alberto)
83
84 - Possibility to set the full object's path in the Preprocessor's and
85 Shuttle's  Store functions
86 - Possibility to extend the object's run validity in the same classes
87 ("startValidity" and "validityInfinite" parameters)
88 - Implementation of the StoreReferenceData function to store reference
89 data in a dedicated CDB storage.
90
91 Revision 1.11  2006/07/21 07:37:20  jgrosseo
92 last run is stored after each run
93
94 Revision 1.10  2006/07/20 09:54:40  jgrosseo
95 introducing status management: The processing per subdetector is divided into several steps,
96 after each step the status is stored on disk. If the system crashes in any of the steps the Shuttle
97 can keep track of the number of failures and skips further processing after a certain threshold is
98 exceeded. These thresholds can be configured in LDAP.
99
100 Revision 1.9  2006/07/19 10:09:55  jgrosseo
101 new configuration, accesst to DAQ FES (Alberto)
102
103 Revision 1.8  2006/07/11 12:44:36  jgrosseo
104 adding parameters for extended validity range of data produced by preprocessor
105
106 Revision 1.7  2006/07/10 14:37:09  jgrosseo
107 small fix + todo comment
108
109 Revision 1.6  2006/07/10 13:01:41  jgrosseo
110 enhanced storing of last sucessfully processed run (alberto)
111
112 Revision 1.5  2006/07/04 14:59:57  jgrosseo
113 revision of AliDCSValue: Removed wrapper classes, reduced storage size per value by factor 2
114
115 Revision 1.4  2006/06/12 09:11:16  jgrosseo
116 coding conventions (Alberto)
117
118 Revision 1.3  2006/06/06 14:26:40  jgrosseo
119 o) removed files that were moved to STEER
120 o) shuttle updated to follow the new interface (Alberto)
121
122 Revision 1.2  2006/03/07 07:52:34  hristov
123 New version (B.Yordanov)
124
125 Revision 1.6  2005/11/19 17:19:14  byordano
126 RetrieveDATEEntries and RetrieveConditionsData added
127
128 Revision 1.5  2005/11/19 11:09:27  byordano
129 AliShuttle declaration added
130
131 Revision 1.4  2005/11/17 17:47:34  byordano
132 TList changed to TObjArray
133
134 Revision 1.3  2005/11/17 14:43:23  byordano
135 import to local CVS
136
137 Revision 1.1.1.1  2005/10/28 07:33:58  hristov
138 Initial import as subdirectory in AliRoot
139
140 Revision 1.2  2005/09/13 08:41:15  byordano
141 default startTime endTime added
142
143 Revision 1.4  2005/08/30 09:13:02  byordano
144 some docs added
145
146 Revision 1.3  2005/08/29 21:15:47  byordano
147 some docs added
148
149 */
150
151 //
152 // This class is the main manager for AliShuttle. 
153 // It organizes the data retrieval from DCS and call the 
154 // interface methods of AliPreprocessor.
155 // For every detector in AliShuttleConfgi (see AliShuttleConfig),
156 // data for its set of aliases is retrieved. If there is registered
157 // AliPreprocessor for this detector then it will be used
158 // accroding to the schema (see AliPreprocessor).
159 // If there isn't registered AliPreprocessor than the retrieved
160 // data is stored automatically to the undelying AliCDBStorage.
161 // For detSpec is used the alias name.
162 //
163
164 #include "AliShuttle.h"
165
166 #include "AliCDBManager.h"
167 #include "AliCDBStorage.h"
168 #include "AliCDBId.h"
169 #include "AliCDBRunRange.h"
170 #include "AliCDBPath.h"
171 #include "AliCDBEntry.h"
172 #include "AliShuttleConfig.h"
173 #include "DCSClient/AliDCSClient.h"
174 #include "AliLog.h"
175 #include "AliPreprocessor.h"
176 #include "AliShuttleStatus.h"
177 #include "AliShuttleLogbookEntry.h"
178
179 #include <TSystem.h>
180 #include <TObject.h>
181 #include <TString.h>
182 #include <TTimeStamp.h>
183 #include <TObjString.h>
184 #include <TSQLServer.h>
185 #include <TSQLResult.h>
186 #include <TSQLRow.h>
187 #include <TMutex.h>
188 #include <TSystemDirectory.h>
189 #include <TSystemFile.h>
190 #include <TFileMerger.h>
191 #include <TGrid.h>
192 #include <TGridResult.h>
193
194 #include <TMonaLisaWriter.h>
195
196 #include <fstream>
197
198 #include <sys/types.h>
199 #include <sys/wait.h>
200
201 ClassImp(AliShuttle)
202
203 //______________________________________________________________________________________________
204 AliShuttle::AliShuttle(const AliShuttleConfig* config,
205                 UInt_t timeout, Int_t retries):
206 fConfig(config),
207 fTimeout(timeout), fRetries(retries),
208 fPreprocessorMap(),
209 fLogbookEntry(0),
210 fCurrentDetector(),
211 fStatusEntry(0),
212 fMonitoringMutex(0),
213 fLastActionTime(0),
214 fLastAction(),
215 fMonaLisa(0),
216 fTestMode(kNone),
217 fReadTestMode(kFALSE)
218 {
219         //
220         // config: AliShuttleConfig used
221         // timeout: timeout used for AliDCSClient connection
222         // retries: the number of retries in case of connection error.
223         //
224
225         if (!fConfig->IsValid()) AliFatal("********** !!!!! Invalid configuration !!!!! **********");
226         for(int iSys=0;iSys<4;iSys++) {
227                 fServer[iSys]=0;
228                 if (iSys < 3)
229                         fFXSlist[iSys].SetOwner(kTRUE);
230         }
231         fPreprocessorMap.SetOwner(kTRUE);
232
233         for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
234                 fFirstUnprocessed[iDet] = kFALSE;
235
236         fMonitoringMutex = new TMutex();
237 }
238
239 //______________________________________________________________________________________________
240 AliShuttle::~AliShuttle()
241 {
242         //
243         // destructor
244         //
245
246         fPreprocessorMap.DeleteAll();
247         for(int iSys=0;iSys<4;iSys++)
248                 if(fServer[iSys]) {
249                         fServer[iSys]->Close();
250                         delete fServer[iSys];
251                         fServer[iSys] = 0;
252                 }
253
254         if (fStatusEntry){
255                 delete fStatusEntry;
256                 fStatusEntry = 0;
257         }
258         
259         if (fMonitoringMutex) 
260         {
261                 delete fMonitoringMutex;
262                 fMonitoringMutex = 0;
263         }
264 }
265
266 //______________________________________________________________________________________________
267 void AliShuttle::RegisterPreprocessor(AliPreprocessor* preprocessor)
268 {
269         //
270         // Registers new AliPreprocessor.
271         // It uses GetName() for indentificator of the pre processor.
272         // The pre processor is registered it there isn't any other
273         // with the same identificator (GetName()).
274         //
275
276         const char* detName = preprocessor->GetName();
277         if(GetDetPos(detName) < 0)
278                 AliFatal(Form("********** !!!!! Invalid detector name: %s !!!!! **********", detName));
279
280         if (fPreprocessorMap.GetValue(detName)) {
281                 AliWarning(Form("AliPreprocessor %s is already registered!", detName));
282                 return;
283         }
284
285         fPreprocessorMap.Add(new TObjString(detName), preprocessor);
286 }
287 //______________________________________________________________________________________________
288 Bool_t AliShuttle::Store(const AliCDBPath& path, TObject* object,
289                 AliCDBMetaData* metaData, Int_t validityStart, Bool_t validityInfinite)
290 {
291         // Stores a CDB object in the storage for offline reconstruction. Objects that are not needed for
292         // offline reconstruction, but should be stored anyway (e.g. for debugging) should NOT be stored
293         // using this function. Use StoreReferenceData instead!
294         // It calls StoreLocally function which temporarily stores the data locally; when the preprocessor
295         // finishes the data are transferred to the main storage (Grid).
296
297         return StoreLocally(fgkLocalCDB, path, object, metaData, validityStart, validityInfinite);
298 }
299
300 //______________________________________________________________________________________________
301 Bool_t AliShuttle::StoreReferenceData(const AliCDBPath& path, TObject* object, AliCDBMetaData* metaData)
302 {
303         // Stores a CDB object in the storage for reference data. This objects will not be available during
304         // offline reconstrunction. Use this function for reference data only!
305         // It calls StoreLocally function which temporarily stores the data locally; when the preprocessor
306         // finishes the data are transferred to the main storage (Grid).
307
308         return StoreLocally(fgkLocalRefStorage, path, object, metaData);
309 }
310
311 //______________________________________________________________________________________________
312 Bool_t AliShuttle::StoreLocally(const TString& localUri,
313                         const AliCDBPath& path, TObject* object, AliCDBMetaData* metaData,
314                         Int_t validityStart, Bool_t validityInfinite)
315 {
316         // Store object temporarily in local storage. Parameters are passed by Store and StoreReferenceData functions.
317         // when the preprocessor finishes the data are transferred to the main storage (Grid).
318         // The parameters are:
319         //   1) Uri of the backup storage (Local)
320         //   2) the object's path.
321         //   3) the object to be stored
322         //   4) the metaData to be associated with the object
323         //   5) the validity start run number w.r.t. the current run,
324         //      if the data is valid only for this run leave the default 0
325         //   6) specifies if the calibration data is valid for infinity (this means until updated),
326         //      typical for calibration runs, the default is kFALSE
327         //
328         // returns 0 if fail, 1 otherwise
329
330         if (fTestMode & kErrorStorage)
331         {
332                 Log(fCurrentDetector, "StoreLocally - In TESTMODE - Simulating error while storing locally");
333                 return kFALSE;
334         }
335         
336         const char* cdbType = (localUri == fgkLocalCDB) ? "CDB" : "Reference";
337
338         Int_t firstRun = GetCurrentRun() - validityStart;
339         if(firstRun < 0) {
340                 AliWarning("First valid run happens to be less than 0! Setting it to 0.");
341                 firstRun=0;
342         }
343
344         Int_t lastRun = -1;
345         if(validityInfinite) {
346                 lastRun = AliCDBRunRange::Infinity();
347         } else {
348                 lastRun = GetCurrentRun();
349         }
350
351         // Version is set to current run, it will be used later to transfer data to Grid
352         AliCDBId id(path, firstRun, lastRun, GetCurrentRun(), -1);
353
354         if(! dynamic_cast<TObjString*> (metaData->GetProperty("RunUsed(TObjString)"))){
355                 TObjString runUsed = Form("%d", GetCurrentRun());
356                 metaData->SetProperty("RunUsed(TObjString)", runUsed.Clone());
357         }
358
359         Bool_t result = kFALSE;
360
361         if (!(AliCDBManager::Instance()->GetStorage(localUri))) {
362                 Log("SHUTTLE", Form("StoreLocally - Cannot activate local %s storage", cdbType));
363         } else {
364                 result = AliCDBManager::Instance()->GetStorage(localUri)
365                                         ->Put(object, id, metaData);
366         }
367
368         if(!result) {
369
370                 Log(fCurrentDetector, Form("StoreLocally - Can't store object <%s>!", id.ToString().Data()));
371         }
372
373         return result;
374 }
375
376 //______________________________________________________________________________________________
377 Bool_t AliShuttle::StoreOCDB()
378 {
379         //
380         // Called when preprocessor ends successfully or when previous storage attempt failed (kStoreError status)
381         // Calls underlying StoreOCDB(const char*) function twice, for OCDB and Reference storage.
382         // Then calls StoreRefFilesToGrid to store reference files. 
383         //
384         
385         if (fTestMode & kErrorGrid)
386         {
387                 Log("SHUTTLE", "StoreOCDB - In TESTMODE - Simulating error while storing in the Grid");
388                 Log(fCurrentDetector, "StoreOCDB - In TESTMODE - Simulating error while storing in the Grid");
389                 return kFALSE;
390         }
391         
392         AliInfo("Storing OCDB data ...");
393         Bool_t resultCDB = StoreOCDB(fgkMainCDB);
394
395         AliInfo("Storing reference data ...");
396         Bool_t resultRef = StoreOCDB(fgkMainRefStorage);
397         
398         AliInfo("Storing reference files ...");
399         Bool_t resultRefFiles = StoreRefFilesToGrid();
400         
401         return resultCDB && resultRef && resultRefFiles;
402 }
403
404 //______________________________________________________________________________________________
405 Bool_t AliShuttle::StoreOCDB(const TString& gridURI)
406 {
407         //
408         // Called by StoreOCDB(), performs actual storage to the main OCDB and reference storages (Grid)
409         //
410
411         TObjArray* gridIds=0;
412
413         Bool_t result = kTRUE;
414
415         const char* type = 0;
416         TString localURI;
417         if(gridURI == fgkMainCDB) {
418                 type = "OCDB";
419                 localURI = fgkLocalCDB;
420         } else if(gridURI == fgkMainRefStorage) {
421                 type = "reference";
422                 localURI = fgkLocalRefStorage;
423         } else {
424                 AliError(Form("Invalid storage URI: %s", gridURI.Data()));
425                 return kFALSE;
426         }
427
428         AliCDBManager* man = AliCDBManager::Instance();
429
430         AliCDBStorage *gridSto = man->GetStorage(gridURI);
431         if(!gridSto) {
432                 Log("SHUTTLE",
433                         Form("StoreOCDB - cannot activate main %s storage", type));
434                 return kFALSE;
435         }
436
437         gridIds = gridSto->GetQueryCDBList();
438
439         // get objects previously stored in local CDB
440         AliCDBStorage *localSto = man->GetStorage(localURI);
441         if(!localSto) {
442                 Log("SHUTTLE",
443                         Form("StoreOCDB - cannot activate local %s storage", type));
444                 return kFALSE;
445         }
446         AliCDBPath aPath(GetOfflineDetName(fCurrentDetector.Data()),"*","*");
447         // Local objects were stored with current run as Grid version!
448         TList* localEntries = localSto->GetAll(aPath.GetPath(), GetCurrentRun(), GetCurrentRun());
449         localEntries->SetOwner(1);
450
451         // loop on local stored objects
452         TIter localIter(localEntries);
453         AliCDBEntry *aLocEntry = 0;
454         while((aLocEntry = dynamic_cast<AliCDBEntry*> (localIter.Next()))){
455                 aLocEntry->SetOwner(1);
456                 AliCDBId aLocId = aLocEntry->GetId();
457                 aLocEntry->SetVersion(-1);
458                 aLocEntry->SetSubVersion(-1);
459
460                 // If local object is valid up to infinity we store it only if it is
461                 // the first unprocessed run!
462                 if (aLocId.GetLastRun() == AliCDBRunRange::Infinity() &&
463                         !fFirstUnprocessed[GetDetPos(fCurrentDetector)])
464                 {
465                         Log("SHUTTLE", Form("StoreOCDB - %s: object %s has validity infinite but "
466                                                 "there are previous unprocessed runs!",
467                                                 fCurrentDetector.Data(), aLocId.GetPath().Data()));
468                         continue;
469                 }
470
471                 // loop on Grid valid Id's
472                 Bool_t store = kTRUE;
473                 TIter gridIter(gridIds);
474                 AliCDBId* aGridId = 0;
475                 while((aGridId = dynamic_cast<AliCDBId*> (gridIter.Next()))){
476                         if(aGridId->GetPath() != aLocId.GetPath()) continue;
477                         // skip all objects valid up to infinity
478                         if(aGridId->GetLastRun() == AliCDBRunRange::Infinity()) continue;
479                         // if we get here, it means there's already some more recent object stored on Grid!
480                         store = kFALSE;
481                         break;
482                 }
483
484                 // If we get here, the file can be stored!
485                 Bool_t storeOk = gridSto->Put(aLocEntry);
486                 if(!store || storeOk){
487
488                         if (!store)
489                         {
490                                 Log(fCurrentDetector.Data(),
491                                         Form("StoreOCDB - A more recent object already exists in %s storage: <%s>",
492                                                 type, aGridId->ToString().Data()));
493                         } else {
494                                 Log("SHUTTLE",
495                                         Form("StoreOCDB - Object <%s> successfully put into %s storage",
496                                                 aLocId.ToString().Data(), type));
497                         }
498
499                         // removing local filename...
500                         TString filename;
501                         localSto->IdToFilename(aLocId, filename);
502                         AliInfo(Form("Removing local file %s", filename.Data()));
503                         RemoveFile(filename.Data());
504                         continue;
505                 } else  {
506                         Log("SHUTTLE",
507                                 Form("StoreOCDB - Grid %s storage of object <%s> failed",
508                                         type, aLocId.ToString().Data()));
509                         result = kFALSE;
510                 }
511         }
512         localEntries->Clear();
513
514         return result;
515 }
516
517 //______________________________________________________________________________________________
518 Bool_t AliShuttle::StoreReferenceFile(const char* detector, const char* localFile, const char* gridFileName)
519 {
520         //
521         // Stores reference file directly (without opening it). This function stores the file locally
522         // renaming it to #runNumber_gridFileName.
523         //
524         
525         if (fTestMode & kErrorStorage)
526         {
527                 Log(fCurrentDetector, "StoreReferenceFile - In TESTMODE - Simulating error while storing locally");
528                 return kFALSE;
529         }
530         
531         AliCDBManager* man = AliCDBManager::Instance();
532         AliCDBStorage* sto = man->GetStorage(fgkLocalRefStorage);
533         
534         TString localBaseFolder = sto->GetBaseFolder();
535         
536         TString targetDir;
537         targetDir.Form("%s/%s", localBaseFolder.Data(), detector);
538         
539         TString target;
540         target.Form("%s/%d_%s", targetDir.Data(), GetCurrentRun(), gridFileName);
541         
542         Int_t result = gSystem->GetPathInfo(targetDir, 0, (Long64_t*) 0, 0, 0);
543         if (result)
544         {
545                 result = gSystem->mkdir(targetDir, kTRUE);
546                 if (result != 0)
547                 {
548                         Log("SHUTTLE", Form("StoreReferenceFile - Error creating base directory %s", targetDir.Data()));
549                         return kFALSE;
550                 }
551         }
552                 
553         result = gSystem->CopyFile(localFile, target);
554
555         if (result == 0)
556         {
557                 Log("SHUTTLE", Form("StoreReferenceFile - Stored file %s locally to %s", localFile, target.Data()));
558                 return kTRUE;
559         }
560         else
561         {
562                 Log("SHUTTLE", Form("StoreReferenceFile - Storing file %s locally to %s failed", localFile, target.Data()));
563                 return kFALSE;
564         }       
565 }
566
567 //______________________________________________________________________________________________
568 Bool_t AliShuttle::StoreRefFilesToGrid()
569 {
570         //
571         // Transfers the reference file to the Grid.
572         // The final full path of the file is:
573         // gridBaseReferenceFolder/DET/#runNumber_gridFileName
574         //
575         
576         AliCDBManager* man = AliCDBManager::Instance();
577         AliCDBStorage* sto = man->GetStorage(fgkLocalRefStorage);
578         if (!sto)
579                 return kFALSE;
580         TString localBaseFolder = sto->GetBaseFolder();
581                 
582         TString dir;
583         dir.Form("%s/%s", localBaseFolder.Data(), fCurrentDetector.Data());
584         
585         AliCDBStorage* gridSto = man->GetStorage(fgkMainRefStorage);
586         if (!gridSto)
587                 return kFALSE;
588         TString gridBaseFolder = gridSto->GetBaseFolder();
589         TString alienDir;
590         alienDir.Form("%s%s", gridBaseFolder.Data(), fCurrentDetector.Data());
591         
592         if(!gGrid) 
593                 return kFALSE;
594         
595         // check that DET folder exists, otherwise create it
596         TGridResult* result = gGrid->Ls(alienDir.Data());
597         
598         if(!result)
599                 return kFALSE;
600         
601         if(!result->GetFileName(0)) {
602                 if(!gGrid->Mkdir(alienDir.Data(),"",0)){
603                         Log("SHUTTLE", Form("StoreRefFilesToGrid - Cannot create directory %s",
604                                         alienDir.Data()));
605                         return kFALSE;
606                 }
607                 
608         }
609
610         TString begin;
611         begin.Form("%d_", GetCurrentRun());
612         
613         TSystemDirectory* baseDir = new TSystemDirectory("/", dir);
614         TList* dirList            = baseDir->GetListOfFiles();
615         if (!dirList)
616                 return kTRUE;
617                 
618         Int_t nDirs               = dirList->GetEntries();
619         
620         Bool_t success = kTRUE;
621         
622         for (Int_t iDir=0; iDir<nDirs; ++iDir)
623         {
624                 TSystemFile* entry = dynamic_cast<TSystemFile*> (dirList->At(iDir));
625                 if (!entry)
626                         continue;
627                         
628                 if (entry->IsDirectory())
629                         continue;
630                         
631                 TString fileName(entry->GetName());
632                 if (!fileName.BeginsWith(begin))
633                         continue;
634                         
635                 TString fullLocalPath;
636                 fullLocalPath.Form("%s/%s", dir.Data(), fileName.Data());
637                 
638                 TString fullGridPath;
639                 fullGridPath.Form("alien://%s/%s", alienDir.Data(), fileName.Data());
640
641                 Log("SHUTTLE", Form("StoreRefFilesToGrid - Copying local file %s to %s", fullLocalPath.Data(), fullGridPath.Data()));
642                 
643                 TFileMerger fileMerger;
644                 Bool_t result = fileMerger.Cp(fullLocalPath, fullGridPath);
645                 
646                 if (result)
647                 {
648                         Log("SHUTTLE", Form("StoreRefFilesToGrid - Copying local file %s to %s succeeded", fullLocalPath.Data(), fullGridPath.Data()));
649                         RemoveFile(fullLocalPath);
650                 }
651                 else
652                 {
653                         Log("SHUTTLE", Form("StoreRefFilesToGrid - Copying local file %s to %s failed", fullLocalPath.Data(), fullGridPath.Data()));
654                         success = kFALSE;
655                 }
656         }
657         
658         delete baseDir;
659         
660         return success;
661 }
662
663 //______________________________________________________________________________________________
664 void AliShuttle::CleanLocalStorage(const TString& uri)
665 {
666         //
667         // Called in case the preprocessor is declared failed. Remove remaining objects from the local storages.
668         //
669
670         const char* type = 0;
671         if(uri == fgkLocalCDB) {
672                 type = "OCDB";
673         } else if(uri == fgkLocalRefStorage) {
674                 type = "reference";
675         } else {
676                 AliError(Form("Invalid storage URI: %s", uri.Data()));
677                 return;
678         }
679
680         AliCDBManager* man = AliCDBManager::Instance();
681
682         // open local storage
683         AliCDBStorage *localSto = man->GetStorage(uri);
684         if(!localSto) {
685                 Log("SHUTTLE",
686                         Form("CleanLocalStorage - cannot activate local %s storage", type));
687                 return;
688         }
689
690         TString filename(Form("%s/%s/*/Run*_v%d_s*.root",
691                 localSto->GetBaseFolder().Data(), fCurrentDetector.Data(), GetCurrentRun()));
692
693         AliInfo(Form("filename = %s", filename.Data()));
694
695         AliInfo(Form("Removing remaining local files from run %d and detector %s ...",
696                 GetCurrentRun(), fCurrentDetector.Data()));
697
698         RemoveFile(filename.Data());
699
700 }
701
702 //______________________________________________________________________________________________
703 void AliShuttle::RemoveFile(const char* filename)
704 {
705         //
706         // removes local file
707         //
708
709         TString command(Form("rm -f %s", filename));
710
711         Int_t result = gSystem->Exec(command.Data());
712         if(result != 0)
713         {
714                 Log("SHUTTLE", Form("RemoveFile - %s: Cannot remove file %s!",
715                         fCurrentDetector.Data(), filename));
716         }
717 }
718
719 //______________________________________________________________________________________________
720 AliShuttleStatus* AliShuttle::ReadShuttleStatus()
721 {
722         //
723         // Reads the AliShuttleStatus from the CDB
724         //
725
726         if (fStatusEntry){
727                 delete fStatusEntry;
728                 fStatusEntry = 0;
729         }
730
731         fStatusEntry = AliCDBManager::Instance()->GetStorage(GetLocalCDB())
732                 ->Get(Form("/SHUTTLE/STATUS/%s", fCurrentDetector.Data()), GetCurrentRun());
733
734         if (!fStatusEntry) return 0;
735         fStatusEntry->SetOwner(1);
736
737         AliShuttleStatus* status = dynamic_cast<AliShuttleStatus*> (fStatusEntry->GetObject());
738         if (!status) {
739                 AliError("Invalid object stored to CDB!");
740                 return 0;
741         }
742
743         return status;
744 }
745
746 //______________________________________________________________________________________________
747 Bool_t AliShuttle::WriteShuttleStatus(AliShuttleStatus* status)
748 {
749         //
750         // writes the status for one subdetector
751         //
752
753         if (fStatusEntry){
754                 delete fStatusEntry;
755                 fStatusEntry = 0;
756         }
757
758         Int_t run = GetCurrentRun();
759
760         AliCDBId id(AliCDBPath("SHUTTLE", "STATUS", fCurrentDetector), run, run);
761
762         fStatusEntry = new AliCDBEntry(status, id, new AliCDBMetaData);
763         fStatusEntry->SetOwner(1);
764
765         UInt_t result = AliCDBManager::Instance()->GetStorage(fgkLocalCDB)->Put(fStatusEntry);
766
767         if (!result) {
768                 Log("SHUTTLE", Form("WriteShuttleStatus - Failed for %s, run %d",
769                                                 fCurrentDetector.Data(), run));
770                 return kFALSE;
771         }
772         
773         SendMLInfo();
774
775         return kTRUE;
776 }
777
778 //______________________________________________________________________________________________
779 void AliShuttle::UpdateShuttleStatus(AliShuttleStatus::Status newStatus, Bool_t increaseCount)
780 {
781         //
782         // changes the AliShuttleStatus for the given detector and run to the given status
783         //
784
785         if (!fStatusEntry){
786                 AliError("UNEXPECTED: fStatusEntry empty");
787                 return;
788         }
789
790         AliShuttleStatus* status = dynamic_cast<AliShuttleStatus*> (fStatusEntry->GetObject());
791
792         if (!status){
793                 Log("SHUTTLE", "UNEXPECTED: status could not be read from current CDB entry");
794                 return;
795         }
796
797         TString actionStr = Form("UpdateShuttleStatus - %s: Changing state from %s to %s",
798                                 fCurrentDetector.Data(),
799                                 status->GetStatusName(),
800                                 status->GetStatusName(newStatus));
801         Log("SHUTTLE", actionStr);
802         SetLastAction(actionStr);
803
804         status->SetStatus(newStatus);
805         if (increaseCount) status->IncreaseCount();
806
807         AliCDBManager::Instance()->GetStorage(fgkLocalCDB)->Put(fStatusEntry);
808
809         SendMLInfo();
810 }
811
812 //______________________________________________________________________________________________
813 void AliShuttle::SendMLInfo()
814 {
815         //
816         // sends ML information about the current status of the current detector being processed
817         //
818         
819         AliShuttleStatus* status = dynamic_cast<AliShuttleStatus*> (fStatusEntry->GetObject());
820         
821         if (!status){
822                 Log("SHUTTLE", "SendMLInfo - UNEXPECTED: status could not be read from current CDB entry");
823                 return;
824         }
825         
826         TMonaLisaText  mlStatus(Form("%s_status", fCurrentDetector.Data()), status->GetStatusName());
827         TMonaLisaValue mlRetryCount(Form("%s_count", fCurrentDetector.Data()), status->GetCount());
828
829         TList mlList;
830         mlList.Add(&mlStatus);
831         mlList.Add(&mlRetryCount);
832
833         fMonaLisa->SendParameters(&mlList);
834 }
835
836 //______________________________________________________________________________________________
837 Bool_t AliShuttle::ContinueProcessing()
838 {
839         // this function reads the AliShuttleStatus information from CDB and
840         // checks if the processing should be continued
841         // if yes it returns kTRUE and updates the AliShuttleStatus with nextStatus
842
843         if (!fConfig->HostProcessDetector(fCurrentDetector)) return kFALSE;
844
845         AliPreprocessor* aPreprocessor =
846                 dynamic_cast<AliPreprocessor*> (fPreprocessorMap.GetValue(fCurrentDetector));
847         if (!aPreprocessor)
848         {
849                 AliInfo(Form("%s: no preprocessor registered", fCurrentDetector.Data()));
850                 return kFALSE;
851         }
852
853         AliShuttleLogbookEntry::Status entryStatus =
854                 fLogbookEntry->GetDetectorStatus(fCurrentDetector);
855
856         if(entryStatus != AliShuttleLogbookEntry::kUnprocessed) {
857                 AliInfo(Form("ContinueProcessing - %s is %s",
858                                 fCurrentDetector.Data(),
859                                 fLogbookEntry->GetDetectorStatusName(entryStatus)));
860                 return kFALSE;
861         }
862
863         // if we get here, according to Shuttle logbook subdetector is in UNPROCESSED state
864
865         // check if current run is first unprocessed run for current detector
866         if (fConfig->StrictRunOrder(fCurrentDetector) &&
867                 !fFirstUnprocessed[GetDetPos(fCurrentDetector)])
868         {
869                 Log("SHUTTLE", Form("ContinueProcessing - %s requires strict run ordering but this is not the first unprocessed run!"));
870                 return kFALSE;
871         }
872
873         AliShuttleStatus* status = ReadShuttleStatus();
874         if (!status) {
875                 // first time
876                 Log("SHUTTLE", Form("ContinueProcessing - %s: Processing first time",
877                                 fCurrentDetector.Data()));
878                 status = new AliShuttleStatus(AliShuttleStatus::kStarted);
879                 return WriteShuttleStatus(status);
880         }
881
882         // The following two cases shouldn't happen if Shuttle Logbook was correctly updated.
883         // If it happens it may mean Logbook updating failed... let's do it now!
884         if (status->GetStatus() == AliShuttleStatus::kDone ||
885             status->GetStatus() == AliShuttleStatus::kFailed){
886                 Log("SHUTTLE", Form("ContinueProcessing - %s is already %s. Updating Shuttle Logbook",
887                                         fCurrentDetector.Data(),
888                                         status->GetStatusName(status->GetStatus())));
889                 UpdateShuttleLogbook(fCurrentDetector.Data(),
890                                         status->GetStatusName(status->GetStatus()));
891                 return kFALSE;
892         }
893
894         if (status->GetStatus() == AliShuttleStatus::kStoreError) {
895                 Log("SHUTTLE",
896                         Form("ContinueProcessing - %s: Grid storage of one or more objects failed. Trying again now",
897                                 fCurrentDetector.Data()));
898                 UpdateShuttleStatus(AliShuttleStatus::kStoreStarted);
899                 if (StoreOCDB()){
900                         Log("SHUTTLE", Form("ContinueProcessing - %s: all objects successfully stored into main storage",
901                                 fCurrentDetector.Data()));
902                         UpdateShuttleStatus(AliShuttleStatus::kDone);
903                         UpdateShuttleLogbook(fCurrentDetector.Data(), "DONE");
904                 } else {
905                         Log("SHUTTLE",
906                                 Form("ContinueProcessing - %s: Grid storage failed again",
907                                         fCurrentDetector.Data()));
908                         UpdateShuttleStatus(AliShuttleStatus::kStoreError);
909                 }
910                 return kFALSE;
911         }
912
913         // if we get here, there is a restart
914         Bool_t cont = kFALSE;
915
916         // abort conditions
917         if (status->GetCount() >= fConfig->GetMaxRetries()) {
918                 Log("SHUTTLE", Form("ContinueProcessing - %s failed %d times in status %s - "
919                                 "Updating Shuttle Logbook", fCurrentDetector.Data(),
920                                 status->GetCount(), status->GetStatusName()));
921                 UpdateShuttleLogbook(fCurrentDetector.Data(), "FAILED");
922                 UpdateShuttleStatus(AliShuttleStatus::kFailed);
923
924                 // there may still be objects in local OCDB and reference storage
925                 // and FXS databases may be not updated: do it now!
926                 
927                 // TODO Currently disabled, we want to keep files in case of failure!
928                 // CleanLocalStorage(fgkLocalCDB);
929                 // CleanLocalStorage(fgkLocalRefStorage);
930                 // UpdateTableFailCase();
931                 
932                 // Send mail to detector expert!
933                 AliInfo(Form("Sending mail to %s expert...", fCurrentDetector.Data()));
934                 if (!SendMail())
935                         Log("SHUTTLE", Form("ContinueProcessing - Could not send mail to %s expert",
936                                         fCurrentDetector.Data()));
937
938         } else {
939                 Log("SHUTTLE", Form("ContinueProcessing - %s: restarting. "
940                                 "Aborted before with %s. Retry number %d.", fCurrentDetector.Data(),
941                                 status->GetStatusName(), status->GetCount()));
942                 Bool_t increaseCount = kTRUE;
943                 if (status->GetStatus() == AliShuttleStatus::kDCSError || status->GetStatus() == AliShuttleStatus::kDCSStarted)
944                         increaseCount = kFALSE;
945                 UpdateShuttleStatus(AliShuttleStatus::kStarted, increaseCount);
946                 cont = kTRUE;
947         }
948
949         return cont;
950 }
951
952 //______________________________________________________________________________________________
953 Bool_t AliShuttle::Process(AliShuttleLogbookEntry* entry)
954 {
955         //
956         // Makes data retrieval for all detectors in the configuration.
957         // entry: Shuttle logbook entry, contains run paramenters and status of detectors
958         // (Unprocessed, Inactive, Failed or Done).
959         // Returns kFALSE in case of error occured and kTRUE otherwise
960         //
961
962         if (!entry) return kFALSE;
963
964         fLogbookEntry = entry;
965
966         AliInfo(Form("\n\n \t\t\t^*^*^*^*^*^*^*^*^*^*^*^* run %d: START ^*^*^*^*^*^*^*^*^*^*^*^* \n",
967                                         GetCurrentRun()));
968
969         // create ML instance that monitors this run
970         fMonaLisa = new TMonaLisaWriter(Form("%d", GetCurrentRun()), "SHUTTLE", "aliendb1.cern.ch");
971         // disable monitoring of other parameters that come e.g. from TFile
972         gMonitoringWriter = 0;
973
974         // Send the information to ML
975         TMonaLisaText  mlStatus("SHUTTLE_status", "Processing");
976         TMonaLisaText  mlRunType("SHUTTLE_runtype", Form("%s (%s)", entry->GetRunType(), entry->GetRunParameter("log")));
977
978         TList mlList;
979         mlList.Add(&mlStatus);
980         mlList.Add(&mlRunType);
981
982         fMonaLisa->SendParameters(&mlList);
983
984         if (fLogbookEntry->IsDone())
985         {
986                 Log("SHUTTLE","Process - Shuttle is already DONE. Updating logbook");
987                 UpdateShuttleLogbook("shuttle_done");
988                 fLogbookEntry = 0;
989                 return kTRUE;
990         }
991
992         // read test mode if flag is set
993         if (fReadTestMode)
994         {
995                 TString logEntry(entry->GetRunParameter("log"));
996                 //printf("log entry = %s\n", logEntry.Data());
997                 TString searchStr("Testmode: ");
998                 Int_t pos = logEntry.Index(searchStr.Data());
999                 //printf("%d\n", pos);
1000                 if (pos >= 0)
1001                 {
1002                         TSubString subStr = logEntry(pos + searchStr.Length(), logEntry.Length());
1003                         //printf("%s\n", subStr.String().Data());
1004                         TString newStr(subStr.Data());
1005                         TObjArray* token = newStr.Tokenize(' ');
1006                         if (token)
1007                         {
1008                                 //token->Print();
1009                                 TObjString* tmpStr = dynamic_cast<TObjString*> (token->First());
1010                                 if (tmpStr)
1011                                 {
1012                                         Int_t testMode = tmpStr->String().Atoi();
1013                                         if (testMode > 0)
1014                                         {
1015                                                 Log("SHUTTLE", Form("Enabling test mode %d", testMode));
1016                                                 SetTestMode((TestMode) testMode);
1017                                         }
1018                                 }
1019                                 delete token;          
1020                         }
1021                 }
1022         }
1023         
1024         fLogbookEntry->Print("all");
1025
1026         // Initialization
1027         Bool_t hasError = kFALSE;
1028
1029         AliCDBStorage *mainCDBSto = AliCDBManager::Instance()->GetStorage(fgkMainCDB);
1030         if(mainCDBSto) mainCDBSto->QueryCDB(GetCurrentRun());
1031         AliCDBStorage *mainRefSto = AliCDBManager::Instance()->GetStorage(fgkMainRefStorage);
1032         if(mainRefSto) mainRefSto->QueryCDB(GetCurrentRun());
1033
1034         // Loop on detectors in the configuration
1035         TIter iter(fConfig->GetDetectors());
1036         TObjString* aDetector = 0;
1037
1038         while ((aDetector = (TObjString*) iter.Next()))
1039         {
1040                 fCurrentDetector = aDetector->String();
1041
1042                 if (ContinueProcessing() == kFALSE) continue;
1043
1044                 AliInfo(Form("\n\n \t\t\t****** run %d - %s: START  ******",
1045                                                 GetCurrentRun(), aDetector->GetName()));
1046
1047                 for(Int_t iSys=0;iSys<3;iSys++) fFXSCalled[iSys]=kFALSE;
1048
1049                 Log(fCurrentDetector.Data(), "Starting processing");
1050
1051                 Int_t pid = fork();
1052
1053                 if (pid < 0)
1054                 {
1055                         Log("SHUTTLE", "ERROR: Forking failed");
1056                 }
1057                 else if (pid > 0)
1058                 {
1059                         // parent
1060                         AliInfo(Form("In parent process of %d - %s: Starting monitoring",
1061                                                         GetCurrentRun(), aDetector->GetName()));
1062
1063                         Long_t begin = time(0);
1064
1065                         int status; // to be used with waitpid, on purpose an int (not Int_t)!
1066                         while (waitpid(pid, &status, WNOHANG) == 0)
1067                         {
1068                                 Long_t expiredTime = time(0) - begin;
1069
1070                                 if (expiredTime > fConfig->GetPPTimeOut())
1071                                 {
1072                                         TString tmp;
1073                                         tmp.Form("Process of %s time out. Run time: %d seconds. Killing...",
1074                                                                 fCurrentDetector.Data(), expiredTime);
1075                                         Log("SHUTTLE", tmp);
1076                                         Log(fCurrentDetector, tmp);
1077
1078                                         kill(pid, 9);
1079
1080                                         UpdateShuttleStatus(AliShuttleStatus::kPPTimeOut);
1081                                         hasError = kTRUE;
1082
1083                                         gSystem->Sleep(1000);
1084                                 }
1085                                 else
1086                                 {
1087                                         if (expiredTime % 60 == 0)
1088                                                 Log("SHUTTLE", Form("Checked process. Run time: %d seconds.",
1089                                                                 expiredTime));
1090                                         gSystem->Sleep(1000);
1091                                         
1092                                         TString checkStr;
1093                                         checkStr.Form("ps -o vsize --pid %d | tail -n 1", pid);
1094                                         FILE* pipe = gSystem->OpenPipe(checkStr, "r");
1095                                         if (!pipe)
1096                                         {
1097                                                 Log("SHUTTLE", Form("Error: Could not open pipe to %s", checkStr.Data()));
1098                                                 continue;
1099                                         }
1100                                                 
1101                                         char buffer[100];
1102                                         if (!fgets(buffer, 100, pipe))
1103                                         {
1104                                                 Log("SHUTTLE", "Error: ps did not return anything");
1105                                                 gSystem->ClosePipe(pipe);
1106                                                 continue;
1107                                         }
1108                                         gSystem->ClosePipe(pipe);
1109                                         
1110                                         //Log("SHUTTLE", Form("ps returned %s", buffer));
1111                                         
1112                                         Int_t mem = 0;
1113                                         if ((sscanf(buffer, "%d\n", &mem) != 1) || !mem)
1114                                         {
1115                                                 Log("SHUTTLE", "Error: Could not parse output of ps");
1116                                                 continue;
1117                                         }
1118                                         
1119                                         if (expiredTime % 60 == 0)
1120                                                 Log("SHUTTLE", Form("The process consumes %d KB of memory.", mem));
1121                                         
1122                                         if (mem > fConfig->GetPPMaxMem())
1123                                         {
1124                                                 TString tmp;
1125                                                 tmp.Form("Process exceeds maximum allowed memory (%d KB > %d KB). Killing...",
1126                                                         mem, fConfig->GetPPMaxMem());
1127                                                 Log("SHUTTLE", tmp);
1128                                                 Log(fCurrentDetector, tmp);
1129         
1130                                                 kill(pid, 9);
1131         
1132                                                 UpdateShuttleStatus(AliShuttleStatus::kPPOutOfMemory);
1133                                                 hasError = kTRUE;
1134         
1135                                                 gSystem->Sleep(1000);
1136                                         }
1137                                 }
1138                         }
1139
1140                         AliInfo(Form("In parent process of %d - %s: Client has terminated.",
1141                                                                 GetCurrentRun(), aDetector->GetName()));
1142
1143                         if (WIFEXITED(status))
1144                         {
1145                                 Int_t returnCode = WEXITSTATUS(status);
1146
1147                                 Log("SHUTTLE", Form("%s: the return code is %d", fCurrentDetector.Data(),
1148                                                                                 returnCode));
1149
1150                                 if (returnCode == 0) hasError = kTRUE;
1151                         }
1152                 }
1153                 else if (pid == 0)
1154                 {
1155                         // client
1156                         AliInfo(Form("In client process of %d - %s", GetCurrentRun(), aDetector->GetName()));
1157
1158                         Bool_t success = ProcessCurrentDetector();
1159                         if (success) // Preprocessor finished successfully!
1160                         { 
1161                                 // Update time_processed field in FXS DB
1162                                 if (UpdateTable() == kFALSE)
1163                                         Log("SHUTTLE", Form("Process - %s: Could not update FXS databases!"));
1164
1165                                 // Transfer the data from local storage to main storage (Grid)
1166                                 UpdateShuttleStatus(AliShuttleStatus::kStoreStarted);
1167                                 if (StoreOCDB() == kFALSE)
1168                                 {
1169                                         AliInfo(Form("\n \t\t\t****** run %d - %s: STORAGE ERROR ****** \n\n",
1170                                                         GetCurrentRun(), aDetector->GetName()));
1171                                         UpdateShuttleStatus(AliShuttleStatus::kStoreError);
1172                                         success = kFALSE;
1173                                 } else {
1174                                         AliInfo(Form("\n \t\t\t****** run %d - %s: DONE ****** \n\n",
1175                                                         GetCurrentRun(), aDetector->GetName()));
1176                                         UpdateShuttleStatus(AliShuttleStatus::kDone);
1177                                         UpdateShuttleLogbook(fCurrentDetector, "DONE");
1178                                 }
1179                         }
1180
1181                         for (UInt_t iSys=0; iSys<3; iSys++)
1182                         {
1183                                 if (fFXSCalled[iSys]) fFXSlist[iSys].Clear();
1184                         }
1185
1186                         AliInfo(Form("Client process of %d - %s is exiting now with %d.",
1187                                                         GetCurrentRun(), aDetector->GetName(), success));
1188
1189                         // the client exits here
1190                         gSystem->Exit(success);
1191
1192                         AliError("We should never get here!!!");
1193                 }
1194         }
1195
1196         AliInfo(Form("\n\n \t\t\t^*^*^*^*^*^*^*^*^*^*^*^* run %d: FINISH ^*^*^*^*^*^*^*^*^*^*^*^* \n",
1197                                                         GetCurrentRun()));
1198
1199         //check if shuttle is done for this run, if so update logbook
1200         TObjArray checkEntryArray;
1201         checkEntryArray.SetOwner(1);
1202         TString whereClause = Form("where run=%d", GetCurrentRun());
1203         if (!QueryShuttleLogbook(whereClause.Data(), checkEntryArray) || checkEntryArray.GetEntries() == 0) {
1204                 Log("SHUTTLE", Form("Process - Warning: Cannot check status of run %d on Shuttle logbook!",
1205                                                 GetCurrentRun()));
1206                 return hasError == kFALSE;
1207         }
1208
1209         AliShuttleLogbookEntry* checkEntry = dynamic_cast<AliShuttleLogbookEntry*>
1210                                                 (checkEntryArray.At(0));
1211
1212         if (checkEntry)
1213         {
1214                 if (checkEntry->IsDone())
1215                 {
1216                         Log("SHUTTLE","Process - Shuttle is DONE. Updating logbook");
1217                         UpdateShuttleLogbook("shuttle_done");
1218                 }
1219                 else
1220                 {
1221                         for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
1222                         {
1223                                 if (checkEntry->GetDetectorStatus(iDet) == AliShuttleLogbookEntry::kUnprocessed)
1224                                 {
1225                                         AliDebug(2, Form("Run %d: setting %s as \"not first time unprocessed\"",
1226                                                         checkEntry->GetRun(), GetDetName(iDet)));
1227                                         fFirstUnprocessed[iDet] = kFALSE;
1228                                 }
1229                         }
1230                 }
1231         }
1232
1233         // remove ML instance
1234         delete fMonaLisa;
1235         fMonaLisa = 0;
1236
1237         fLogbookEntry = 0;
1238
1239         return hasError == kFALSE;
1240 }
1241
1242 //______________________________________________________________________________________________
1243 Bool_t AliShuttle::ProcessCurrentDetector()
1244 {
1245         //
1246         // Makes data retrieval just for a specific detector (fCurrentDetector).
1247         // Threre should be a configuration for this detector.
1248
1249         AliInfo(Form("Retrieving values for %s, run %d", fCurrentDetector.Data(), GetCurrentRun()));
1250
1251         TMap dcsMap;
1252         dcsMap.SetOwner(1);
1253
1254         Bool_t aDCSError = kFALSE;
1255
1256         // call preprocessor
1257         AliPreprocessor* aPreprocessor =
1258                 dynamic_cast<AliPreprocessor*> (fPreprocessorMap.GetValue(fCurrentDetector));
1259
1260         aPreprocessor->Initialize(GetCurrentRun(), GetCurrentStartTime(), GetCurrentEndTime());
1261
1262         Bool_t processDCS = aPreprocessor->ProcessDCS();
1263
1264         if (!processDCS || fTestMode & kSkipDCS)
1265         {
1266                 AliInfo("In TESTMODE - Skipping DCS processing!");
1267         } 
1268         else if (fTestMode & kErrorDCS)
1269         {
1270                 AliInfo("In TESTMODE - Simulating DCS error");
1271                 UpdateShuttleStatus(AliShuttleStatus::kDCSError);
1272                 return kFALSE;
1273         } else {
1274
1275                 UpdateShuttleStatus(AliShuttleStatus::kDCSStarted);
1276
1277                 TString host(fConfig->GetDCSHost(fCurrentDetector));
1278                 Int_t port = fConfig->GetDCSPort(fCurrentDetector);
1279
1280                 // Retrieval of Aliases
1281                 TObjString* anAlias = 0;
1282                 Int_t iAlias = 1;
1283                 Int_t nTotAliases= ((TMap*)fConfig->GetDCSAliases(fCurrentDetector))->GetEntries();
1284                 TIter iterAliases(fConfig->GetDCSAliases(fCurrentDetector));
1285                 while ((anAlias = (TObjString*) iterAliases.Next()))
1286                 {
1287                         TObjArray *valueSet = new TObjArray();
1288                         valueSet->SetOwner(1);
1289
1290                         if (((iAlias-1) % 500) == 0 || iAlias == nTotAliases)
1291                                 AliInfo(Form("Querying DCS archive: alias %s (%d of %d)",
1292                                                 anAlias->GetName(), iAlias++, nTotAliases));
1293                         aDCSError = (GetValueSet(host, port, anAlias->String(), valueSet, kAlias) == 0);
1294
1295                         if(!aDCSError)
1296                         {
1297                                 dcsMap.Add(anAlias->Clone(), valueSet);
1298                         } else {
1299                                 Log(fCurrentDetector,
1300                                         Form("ProcessCurrentDetector - Error while retrieving alias %s",
1301                                                 anAlias->GetName()));
1302                                 UpdateShuttleStatus(AliShuttleStatus::kDCSError);
1303                                 dcsMap.DeleteAll();
1304                                 return kFALSE;
1305                         }
1306                 }
1307
1308                 // Retrieval of Data Points
1309                 TObjString* aDP = 0;
1310                 Int_t iDP = 0;
1311                 Int_t nTotDPs= ((TMap*)fConfig->GetDCSDataPoints(fCurrentDetector))->GetEntries();
1312                 TIter iterDP(fConfig->GetDCSDataPoints(fCurrentDetector));
1313                 while ((aDP = (TObjString*) iterDP.Next()))
1314                 {
1315                         TObjArray *valueSet = new TObjArray();
1316                         valueSet->SetOwner(1);
1317                         if (((iDP-1) % 500) == 0 || iDP == nTotDPs)
1318                                 AliInfo(Form("Querying DCS archive: DP %s (%d of %d)",
1319                                                 aDP->GetName(), iDP++, nTotDPs));
1320                         aDCSError = (GetValueSet(host, port, aDP->String(), valueSet, kDP) == 0);
1321
1322                         if(!aDCSError)
1323                         {
1324                                 dcsMap.Add(aDP->Clone(), valueSet);
1325                         } else {
1326                                 Log(fCurrentDetector,
1327                                         Form("ProcessCurrentDetector - Error while retrieving data point %s",
1328                                                 aDP->GetName()));
1329                                 UpdateShuttleStatus(AliShuttleStatus::kDCSError);
1330                                 dcsMap.DeleteAll();
1331                                 return kFALSE;
1332                         }
1333                 }
1334         }
1335
1336         // DCS Archive DB processing successful. Call Preprocessor!
1337         UpdateShuttleStatus(AliShuttleStatus::kPPStarted);
1338
1339         UInt_t returnValue = aPreprocessor->Process(&dcsMap);
1340
1341         if (returnValue > 0) // Preprocessor error!
1342         {
1343                 Log(fCurrentDetector, Form("Preprocessor failed. Process returned %d.", returnValue));
1344                 UpdateShuttleStatus(AliShuttleStatus::kPPError);
1345                 dcsMap.DeleteAll();
1346                 return kFALSE;
1347         }
1348         
1349         // preprocessor ok!
1350         UpdateShuttleStatus(AliShuttleStatus::kPPDone);
1351         Log(fCurrentDetector, Form("ProcessCurrentDetector - %s preprocessor returned success",
1352                                 fCurrentDetector.Data()));
1353
1354         dcsMap.DeleteAll();
1355
1356         return kTRUE;
1357 }
1358
1359 //______________________________________________________________________________________________
1360 Bool_t AliShuttle::QueryShuttleLogbook(const char* whereClause,
1361                 TObjArray& entries)
1362 {
1363         // Query DAQ's Shuttle logbook and fills detector status object.
1364         // Call QueryRunParameters to query DAQ logbook for run parameters.
1365         //
1366
1367         entries.SetOwner(1);
1368
1369         // check connection, in case connect
1370         if(!Connect(3)) return kFALSE;
1371
1372         TString sqlQuery;
1373         sqlQuery = Form("select * from %s %s order by run", fConfig->GetShuttlelbTable(), whereClause);
1374
1375         TSQLResult* aResult = fServer[3]->Query(sqlQuery);
1376         if (!aResult) {
1377                 AliError(Form("Can't execute query <%s>!", sqlQuery.Data()));
1378                 return kFALSE;
1379         }
1380
1381         AliDebug(2,Form("Query = %s", sqlQuery.Data()));
1382
1383         if(aResult->GetRowCount() == 0) {
1384                 AliInfo("No entries in Shuttle Logbook match request");
1385                 delete aResult;
1386                 return kTRUE;
1387         }
1388
1389         // TODO Check field count!
1390         const UInt_t nCols = 22;
1391         if (aResult->GetFieldCount() != (Int_t) nCols) {
1392                 AliError("Invalid SQL result field number!");
1393                 delete aResult;
1394                 return kFALSE;
1395         }
1396
1397         TSQLRow* aRow;
1398         while ((aRow = aResult->Next())) {
1399                 TString runString(aRow->GetField(0), aRow->GetFieldLength(0));
1400                 Int_t run = runString.Atoi();
1401
1402                 AliShuttleLogbookEntry *entry = QueryRunParameters(run);
1403                 if (!entry)
1404                         continue;
1405
1406                 // loop on detectors
1407                 for(UInt_t ii = 0; ii < nCols; ii++)
1408                         entry->SetDetectorStatus(aResult->GetFieldName(ii), aRow->GetField(ii));
1409
1410                 entries.AddLast(entry);
1411                 delete aRow;
1412         }
1413
1414         delete aResult;
1415         return kTRUE;
1416 }
1417
1418 //______________________________________________________________________________________________
1419 AliShuttleLogbookEntry* AliShuttle::QueryRunParameters(Int_t run)
1420 {
1421         //
1422         // Retrieve run parameters written in the DAQ logbook and sets them into AliShuttleLogbookEntry object
1423         //
1424
1425         // check connection, in case connect
1426         if (!Connect(3))
1427                 return 0;
1428
1429         TString sqlQuery;
1430         sqlQuery.Form("select * from %s where run=%d", fConfig->GetDAQlbTable(), run);
1431
1432         TSQLResult* aResult = fServer[3]->Query(sqlQuery);
1433         if (!aResult) {
1434                 AliError(Form("Can't execute query <%s>!", sqlQuery.Data()));
1435                 return 0;
1436         }
1437
1438         if (aResult->GetRowCount() == 0) {
1439                 Log("SHUTTLE", Form("QueryRunParameters - No entry in DAQ Logbook for run %d. Skipping", run));
1440                 delete aResult;
1441                 return 0;
1442         }
1443
1444         if (aResult->GetRowCount() > 1) {
1445                 AliError(Form("More than one entry in DAQ Logbook for run %d. Skipping", run));
1446                 delete aResult;
1447                 return 0;
1448         }
1449
1450         TSQLRow* aRow = aResult->Next();
1451         if (!aRow)
1452         {
1453                 AliError(Form("Could not retrieve row for run %d. Skipping", run));
1454                 delete aResult;
1455                 return 0;
1456         }
1457
1458         AliShuttleLogbookEntry* entry = new AliShuttleLogbookEntry(run);
1459
1460         for (Int_t ii = 0; ii < aResult->GetFieldCount(); ii++)
1461                 entry->SetRunParameter(aResult->GetFieldName(ii), aRow->GetField(ii));
1462
1463         UInt_t startTime = entry->GetStartTime();
1464         UInt_t endTime = entry->GetEndTime();
1465
1466         if (!startTime || !endTime || startTime > endTime) {
1467                 Log("SHUTTLE",
1468                         Form("QueryRunParameters - Invalid parameters for Run %d: startTime = %d, endTime = %d",
1469                                 run, startTime, endTime));
1470                 delete entry;
1471                 delete aRow;
1472                 delete aResult;
1473                 return 0;
1474         }
1475
1476         delete aRow;
1477         delete aResult;
1478
1479         return entry;
1480 }
1481
1482 //______________________________________________________________________________________________
1483 Bool_t AliShuttle::GetValueSet(const char* host, Int_t port, const char* entry,
1484                                 TObjArray* valueSet, DCSType type)
1485 {
1486         // Retrieve all "entry" data points from the DCS server
1487         // host, port: TSocket connection parameters
1488         // entry: name of the alias or data point
1489         // valueSet: array of retrieved AliDCSValue's
1490         // type: kAlias or kDP
1491
1492         AliDCSClient client(host, port, fTimeout, fRetries);
1493         if (!client.IsConnected())
1494         {
1495                 return kFALSE;
1496         }
1497
1498         Int_t result=0;
1499
1500         if (type == kAlias)
1501         {
1502                 result = client.GetAliasValues(entry,
1503                         GetCurrentStartTime(), GetCurrentEndTime(), valueSet);
1504         } else
1505         if (type == kDP)
1506         {
1507                 result = client.GetDPValues(entry,
1508                         GetCurrentStartTime(), GetCurrentEndTime(), valueSet);
1509         }
1510
1511         if (result < 0)
1512         {
1513                 Log(fCurrentDetector.Data(), Form("GetValueSet - Can't get '%s'! Reason: %s",
1514                         entry, AliDCSClient::GetErrorString(result)));
1515
1516                 if (result == AliDCSClient::fgkServerError)
1517                 {
1518                         Log(fCurrentDetector.Data(), Form("GetValueSet - Server error: %s",
1519                                 client.GetServerError().Data()));
1520                 }
1521
1522                 return kFALSE;
1523         }
1524
1525         return kTRUE;
1526 }
1527
1528 //______________________________________________________________________________________________
1529 const char* AliShuttle::GetFile(Int_t system, const char* detector,
1530                 const char* id, const char* source)
1531 {
1532         // Get calibration file from file exchange servers
1533         // First queris the FXS database for the file name, using the run, detector, id and source info
1534         // then calls RetrieveFile(filename) for actual copy to local disk
1535         // run: current run being processed (given by Logbook entry fLogbookEntry)
1536         // detector: the Preprocessor name
1537         // id: provided as a parameter by the Preprocessor
1538         // source: provided by the Preprocessor through GetFileSources function
1539
1540         // check if test mode should simulate a FXS error
1541         if (fTestMode & kErrorFXSFiles)
1542         {
1543                 Log(detector, Form("GetFile - In TESTMODE - Simulating error while connecting to %s FXS", GetSystemName(system)));
1544                 return 0;
1545         }
1546         
1547         // check connection, in case connect
1548         if (!Connect(system))
1549         {
1550                 Log(detector, Form("GetFile - Couldn't connect to %s FXS database", GetSystemName(system)));
1551                 return 0;
1552         }
1553
1554         // Query preparation
1555         TString sourceName(source);
1556         Int_t nFields = 3;
1557         TString sqlQueryStart = Form("select filePath,size,fileChecksum from %s where",
1558                                                                 fConfig->GetFXSdbTable(system));
1559         TString whereClause = Form("run=%d and detector=\"%s\" and fileId=\"%s\"",
1560                                                                 GetCurrentRun(), detector, id);
1561
1562         if (system == kDAQ)
1563         {
1564                 whereClause += Form(" and DAQsource=\"%s\"", source);
1565         }
1566         else if (system == kDCS)
1567         {
1568                 sourceName="none";
1569         }
1570         else if (system == kHLT)
1571         {
1572                 whereClause += Form(" and DDLnumbers=\"%s\"", source);
1573                 nFields = 3;
1574         }
1575
1576         TString sqlQuery = Form("%s %s", sqlQueryStart.Data(), whereClause.Data());
1577
1578         AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
1579
1580         // Query execution
1581         TSQLResult* aResult = 0;
1582         aResult = dynamic_cast<TSQLResult*> (fServer[system]->Query(sqlQuery));
1583         if (!aResult) {
1584                 Log(detector, Form("GetFileName - Can't execute SQL query to %s database for: id = %s, source = %s",
1585                                 GetSystemName(system), id, sourceName.Data()));
1586                 return 0;
1587         }
1588
1589         if(aResult->GetRowCount() == 0)
1590         {
1591                 Log(detector,
1592                         Form("GetFileName - No entry in %s FXS db for: id = %s, source = %s",
1593                                 GetSystemName(system), id, sourceName.Data()));
1594                 delete aResult;
1595                 return 0;
1596         }
1597
1598         if (aResult->GetRowCount() > 1) {
1599                 Log(detector,
1600                         Form("GetFileName - More than one entry in %s FXS db for: id = %s, source = %s",
1601                                 GetSystemName(system), id, sourceName.Data()));
1602                 delete aResult;
1603                 return 0;
1604         }
1605
1606         if (aResult->GetFieldCount() != nFields) {
1607                 Log(detector,
1608                         Form("GetFileName - Wrong field count in %s FXS db for: id = %s, source = %s",
1609                                 GetSystemName(system), id, sourceName.Data()));
1610                 delete aResult;
1611                 return 0;
1612         }
1613
1614         TSQLRow* aRow = dynamic_cast<TSQLRow*> (aResult->Next());
1615
1616         if (!aRow){
1617                 Log(detector, Form("GetFileName - Empty set result in %s FXS db from query: id = %s, source = %s",
1618                                 GetSystemName(system), id, sourceName.Data()));
1619                 delete aResult;
1620                 return 0;
1621         }
1622
1623         TString filePath(aRow->GetField(0), aRow->GetFieldLength(0));
1624         TString fileSize(aRow->GetField(1), aRow->GetFieldLength(1));
1625         TString fileChecksum(aRow->GetField(2), aRow->GetFieldLength(2));
1626
1627         delete aResult;
1628         delete aRow;
1629
1630         AliDebug(2, Form("filePath = %s; size = %s, fileChecksum = %s",
1631                                 filePath.Data(), fileSize.Data(), fileChecksum.Data()));
1632
1633         // retrieved file is renamed to make it unique
1634         TString localFileName = Form("%s_%s_%d_%s_%s.shuttle",
1635                                         GetSystemName(system), detector, GetCurrentRun(), id, sourceName.Data());
1636
1637
1638         // file retrieval from FXS
1639         UInt_t nRetries = 0;
1640         UInt_t maxRetries = 3;
1641         Bool_t result = kFALSE;
1642
1643         // copy!! if successful TSystem::Exec returns 0
1644         while(nRetries++ < maxRetries) {
1645                 AliDebug(2, Form("Trying to copy file. Retry # %d", nRetries));
1646                 result = RetrieveFile(system, filePath.Data(), localFileName.Data());
1647                 if(!result)
1648                 {
1649                         Log(detector, Form("GetFileName - Copy of file %s from %s FXS failed",
1650                                         filePath.Data(), GetSystemName(system)));
1651                         continue;
1652                 } else {
1653                         AliInfo(Form("File %s copied from %s FXS into %s/%s",
1654                                                 filePath.Data(), GetSystemName(system),
1655                                                 GetShuttleTempDir(), localFileName.Data()));
1656                 }
1657
1658                 if (fileChecksum.Length()>0)
1659                 {
1660                         // compare md5sum of local file with the one stored in the FXS DB
1661                         Int_t md5Comp = gSystem->Exec(Form("md5sum %s/%s |grep %s 2>&1 > /dev/null",
1662                                                 GetShuttleTempDir(), localFileName.Data(), fileChecksum.Data()));
1663
1664                         if (md5Comp != 0)
1665                         {
1666                                 Log(detector, Form("GetFileName - md5sum of file %s does not match with local copy!",
1667                                                         filePath.Data()));
1668                                 result = kFALSE;
1669                                 continue;
1670                         }
1671                 } else {
1672                         Log(fCurrentDetector, Form("GetFile - md5sum of file %s not set in %s database, skipping comparison",
1673                                                         filePath.Data(), GetSystemName(system)));
1674                 }
1675                 if (result) break;
1676         }
1677
1678         if(!result) return 0;
1679
1680         fFXSCalled[system]=kTRUE;
1681         TObjString *fileParams = new TObjString(Form("%s#!?!#%s", id, sourceName.Data()));
1682         fFXSlist[system].Add(fileParams);
1683
1684         static TString fullLocalFileName;
1685         fullLocalFileName = TString::Format("%s/%s", GetShuttleTempDir(), localFileName.Data());
1686
1687         AliInfo(Form("fullLocalFileName = %s", fullLocalFileName.Data()));
1688
1689         return fullLocalFileName.Data();
1690
1691 }
1692
1693 //______________________________________________________________________________________________
1694 Bool_t AliShuttle::RetrieveFile(UInt_t system, const char* fxsFileName, const char* localFileName)
1695 {
1696         //
1697         // Copies file from FXS to local Shuttle machine
1698         //
1699
1700         // check temp directory: trying to cd to temp; if it does not exist, create it
1701         AliDebug(2, Form("Copy file %s from %s FXS into %s/%s",
1702                         GetSystemName(system), fxsFileName, GetShuttleTempDir(), localFileName));
1703
1704         void* dir = gSystem->OpenDirectory(GetShuttleTempDir());
1705         if (dir == NULL) {
1706                 if (gSystem->mkdir(GetShuttleTempDir(), kTRUE)) {
1707                         AliError(Form("Can't open directory <%s>", GetShuttleTempDir()));
1708                         return kFALSE;
1709                 }
1710
1711         } else {
1712                 gSystem->FreeDirectory(dir);
1713         }
1714
1715         TString baseFXSFolder;
1716         if (system == kDAQ)
1717         {
1718                 baseFXSFolder = "FES/";
1719         }
1720         else if (system == kDCS)
1721         {
1722                 baseFXSFolder = "";
1723         }
1724         else if (system == kHLT)
1725         {
1726                 baseFXSFolder = "~/";
1727         }
1728
1729
1730         TString command = Form("scp -oPort=%d -2 %s@%s:%s%s %s/%s",
1731                 fConfig->GetFXSPort(system),
1732                 fConfig->GetFXSUser(system),
1733                 fConfig->GetFXSHost(system),
1734                 baseFXSFolder.Data(),
1735                 fxsFileName,
1736                 GetShuttleTempDir(),
1737                 localFileName);
1738
1739         AliDebug(2, Form("%s",command.Data()));
1740
1741         Bool_t result = (gSystem->Exec(command.Data()) == 0);
1742
1743         return result;
1744 }
1745
1746 //______________________________________________________________________________________________
1747 TList* AliShuttle::GetFileSources(Int_t system, const char* detector, const char* id)
1748 {
1749         //
1750         // Get sources producing the condition file Id from file exchange servers
1751         //
1752         
1753         // check if test mode should simulate a FXS error
1754         if (fTestMode & kErrorFXSSources)
1755         {
1756                 Log(detector, Form("GetFileSources - In TESTMODE - Simulating error while connecting to %s FXS", GetSystemName(system)));
1757                 return 0;
1758         }
1759
1760
1761         if (system == kDCS)
1762         {
1763                 AliError("DCS system has only one source of data!");
1764                 return NULL;
1765         }
1766
1767         // check connection, in case connect
1768         if (!Connect(system))
1769         {
1770                 Log(detector, Form("GetFile - Couldn't connect to %s FXS database", GetSystemName(system)));
1771                 return NULL;
1772         }
1773
1774         TString sourceName = 0;
1775         if (system == kDAQ)
1776         {
1777                 sourceName = "DAQsource";
1778         } else if (system == kHLT)
1779         {
1780                 sourceName = "DDLnumbers";
1781         }
1782
1783         TString sqlQueryStart = Form("select %s from %s where", sourceName.Data(), fConfig->GetFXSdbTable(system));
1784         TString whereClause = Form("run=%d and detector=\"%s\" and fileId=\"%s\"",
1785                                 GetCurrentRun(), detector, id);
1786         TString sqlQuery = Form("%s %s", sqlQueryStart.Data(), whereClause.Data());
1787
1788         AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
1789
1790         // Query execution
1791         TSQLResult* aResult;
1792         aResult = fServer[system]->Query(sqlQuery);
1793         if (!aResult) {
1794                 Log(detector, Form("GetFileSources - Can't execute SQL query to %s database for id: %s",
1795                                 GetSystemName(system), id));
1796                 return 0;
1797         }
1798
1799         if (aResult->GetRowCount() == 0)
1800         {
1801                 Log(detector,
1802                         Form("GetFileSources - No entry in %s FXS table for id: %s", GetSystemName(system), id));
1803                 delete aResult;
1804                 return 0;
1805         }
1806
1807         TSQLRow* aRow;
1808         TList *list = new TList();
1809         list->SetOwner(1);
1810
1811         while ((aRow = aResult->Next()))
1812         {
1813
1814                 TString source(aRow->GetField(0), aRow->GetFieldLength(0));
1815                 AliDebug(2, Form("%s = %s", sourceName.Data(), source.Data()));
1816                 list->Add(new TObjString(source));
1817                 delete aRow;
1818         }
1819
1820         delete aResult;
1821
1822         return list;
1823 }
1824
1825 //______________________________________________________________________________________________
1826 Bool_t AliShuttle::Connect(Int_t system)
1827 {
1828         // Connect to MySQL Server of the system's FXS MySQL databases
1829         // DAQ Logbook, Shuttle Logbook and DAQ FXS db are on the same host
1830         //
1831
1832         // check connection: if already connected return
1833         if(fServer[system] && fServer[system]->IsConnected()) return kTRUE;
1834
1835         TString dbHost, dbUser, dbPass, dbName;
1836
1837         if (system < 3) // FXS db servers
1838         {
1839                 dbHost = Form("mysql://%s:%d", fConfig->GetFXSdbHost(system), fConfig->GetFXSdbPort(system));
1840                 dbUser = fConfig->GetFXSdbUser(system);
1841                 dbPass = fConfig->GetFXSdbPass(system);
1842                 dbName =   fConfig->GetFXSdbName(system);
1843         } else { // Run & Shuttle logbook servers
1844         // TODO Will the Shuttle logbook server be the same as the Run logbook server ???
1845                 dbHost = Form("mysql://%s:%d", fConfig->GetDAQlbHost(), fConfig->GetDAQlbPort());
1846                 dbUser = fConfig->GetDAQlbUser();
1847                 dbPass = fConfig->GetDAQlbPass();
1848                 dbName =   fConfig->GetDAQlbDB();
1849         }
1850
1851         fServer[system] = TSQLServer::Connect(dbHost.Data(), dbUser.Data(), dbPass.Data());
1852         if (!fServer[system] || !fServer[system]->IsConnected()) {
1853                 if(system < 3)
1854                 {
1855                 AliError(Form("Can't establish connection to FXS database for %s",
1856                                         AliShuttleInterface::GetSystemName(system)));
1857                 } else {
1858                 AliError("Can't establish connection to Run logbook.");
1859                 }
1860                 if(fServer[system]) delete fServer[system];
1861                 return kFALSE;
1862         }
1863
1864         // Get tables
1865         TSQLResult* aResult=0;
1866         switch(system){
1867                 case kDAQ:
1868                         aResult = fServer[kDAQ]->GetTables(dbName.Data());
1869                         break;
1870                 case kDCS:
1871                         aResult = fServer[kDCS]->GetTables(dbName.Data());
1872                         break;
1873                 case kHLT:
1874                         aResult = fServer[kHLT]->GetTables(dbName.Data());
1875                         break;
1876                 default:
1877                         aResult = fServer[3]->GetTables(dbName.Data());
1878                         break;
1879         }
1880
1881         delete aResult;
1882         return kTRUE;
1883 }
1884
1885 //______________________________________________________________________________________________
1886 Bool_t AliShuttle::UpdateTable()
1887 {
1888         //
1889         // Update FXS table filling time_processed field in all rows corresponding to current run and detector
1890         //
1891
1892         Bool_t result = kTRUE;
1893
1894         for (UInt_t system=0; system<3; system++)
1895         {
1896                 if(!fFXSCalled[system]) continue;
1897
1898                 // check connection, in case connect
1899                 if (!Connect(system))
1900                 {
1901                         Log(fCurrentDetector, Form("UpdateTable - Couldn't connect to %s FXS database", GetSystemName(system)));
1902                         result = kFALSE;
1903                         continue;
1904                 }
1905
1906                 TTimeStamp now; // now
1907
1908                 // Loop on FXS list entries
1909                 TIter iter(&fFXSlist[system]);
1910                 TObjString *aFXSentry=0;
1911                 while ((aFXSentry = dynamic_cast<TObjString*> (iter.Next())))
1912                 {
1913                         TString aFXSentrystr = aFXSentry->String();
1914                         TObjArray *aFXSarray = aFXSentrystr.Tokenize("#!?!#");
1915                         if (!aFXSarray || aFXSarray->GetEntries() != 2 )
1916                         {
1917                                 Log(fCurrentDetector, Form("UpdateTable - error updating %s FXS entry. Check string: <%s>",
1918                                         GetSystemName(system), aFXSentrystr.Data()));
1919                                 if(aFXSarray) delete aFXSarray;
1920                                 result = kFALSE;
1921                                 continue;
1922                         }
1923                         const char* fileId = ((TObjString*) aFXSarray->At(0))->GetName();
1924                         const char* source = ((TObjString*) aFXSarray->At(1))->GetName();
1925
1926                         TString whereClause;
1927                         if (system == kDAQ)
1928                         {
1929                                 whereClause = Form("where run=%d and detector=\"%s\" and fileId=\"%s\" and DAQsource=\"%s\";",
1930                                                         GetCurrentRun(), fCurrentDetector.Data(), fileId, source);
1931                         }
1932                         else if (system == kDCS)
1933                         {
1934                                 whereClause = Form("where run=%d and detector=\"%s\" and fileId=\"%s\";",
1935                                                         GetCurrentRun(), fCurrentDetector.Data(), fileId);
1936                         }
1937                         else if (system == kHLT)
1938                         {
1939                                 whereClause = Form("where run=%d and detector=\"%s\" and fileId=\"%s\" and DDLnumbers=\"%s\";",
1940                                                         GetCurrentRun(), fCurrentDetector.Data(), fileId, source);
1941                         }
1942
1943                         delete aFXSarray;
1944
1945                         TString sqlQuery = Form("update %s set time_processed=%d %s", fConfig->GetFXSdbTable(system),
1946                                                                 now.GetSec(), whereClause.Data());
1947
1948                         AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
1949
1950                         // Query execution
1951                         TSQLResult* aResult;
1952                         aResult = dynamic_cast<TSQLResult*> (fServer[system]->Query(sqlQuery));
1953                         if (!aResult)
1954                         {
1955                                 Log(fCurrentDetector, Form("UpdateTable - %s db: can't execute SQL query <%s>",
1956                                                                 GetSystemName(system), sqlQuery.Data()));
1957                                 result = kFALSE;
1958                                 continue;
1959                         }
1960                         delete aResult;
1961                 }
1962         }
1963
1964         return result;
1965 }
1966
1967 //______________________________________________________________________________________________
1968 Bool_t AliShuttle::UpdateTableFailCase()
1969 {
1970         // Update FXS table filling time_processed field in all rows corresponding to current run and detector
1971         // this is called in case the preprocessor is declared failed for the current run, because
1972         // the fields are updated only in case of success
1973
1974         Bool_t result = kTRUE;
1975
1976         for (UInt_t system=0; system<3; system++)
1977         {
1978                 // check connection, in case connect
1979                 if (!Connect(system))
1980                 {
1981                         Log(fCurrentDetector, Form("UpdateTableFailCase - Couldn't connect to %s FXS database",
1982                                                         GetSystemName(system)));
1983                         result = kFALSE;
1984                         continue;
1985                 }
1986
1987                 TTimeStamp now; // now
1988
1989                 // Loop on FXS list entries
1990
1991                 TString whereClause = Form("where run=%d and detector=\"%s\";",
1992                                                 GetCurrentRun(), fCurrentDetector.Data());
1993
1994
1995                 TString sqlQuery = Form("update %s set time_processed=%d %s", fConfig->GetFXSdbTable(system),
1996                                                         now.GetSec(), whereClause.Data());
1997
1998                 AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
1999
2000                 // Query execution
2001                 TSQLResult* aResult;
2002                 aResult = dynamic_cast<TSQLResult*> (fServer[system]->Query(sqlQuery));
2003                 if (!aResult)
2004                 {
2005                         Log(fCurrentDetector, Form("UpdateTableFailCase - %s db: can't execute SQL query <%s>",
2006                                                         GetSystemName(system), sqlQuery.Data()));
2007                         result = kFALSE;
2008                         continue;
2009                 }
2010                 delete aResult;
2011         }
2012
2013         return result;
2014 }
2015
2016 //______________________________________________________________________________________________
2017 Bool_t AliShuttle::UpdateShuttleLogbook(const char* detector, const char* status)
2018 {
2019         //
2020         // Update Shuttle logbook filling detector or shuttle_done column
2021         // ex. of usage: UpdateShuttleLogbook("PHOS", "DONE") or UpdateShuttleLogbook("shuttle_done")
2022         //
2023
2024         // check connection, in case connect
2025         if(!Connect(3)){
2026                 Log("SHUTTLE", "UpdateShuttleLogbook - Couldn't connect to DAQ Logbook.");
2027                 return kFALSE;
2028         }
2029
2030         TString detName(detector);
2031         TString setClause;
2032         if(detName == "shuttle_done")
2033         {
2034                 setClause = "set shuttle_done=1";
2035
2036                 // Send the information to ML
2037                 TMonaLisaText  mlStatus("SHUTTLE_status", "Done");
2038
2039                 TList mlList;
2040                 mlList.Add(&mlStatus);
2041
2042                 fMonaLisa->SendParameters(&mlList);
2043         } else {
2044                 TString statusStr(status);
2045                 if(statusStr.Contains("done", TString::kIgnoreCase) ||
2046                    statusStr.Contains("failed", TString::kIgnoreCase)){
2047                         setClause = Form("set %s=\"%s\"", detector, status);
2048                 } else {
2049                         Log("SHUTTLE",
2050                                 Form("UpdateShuttleLogbook - Invalid status <%s> for detector %s",
2051                                         status, detector));
2052                         return kFALSE;
2053                 }
2054         }
2055
2056         TString whereClause = Form("where run=%d", GetCurrentRun());
2057
2058         TString sqlQuery = Form("update %s %s %s",
2059                                         fConfig->GetShuttlelbTable(), setClause.Data(), whereClause.Data());
2060
2061         AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
2062
2063         // Query execution
2064         TSQLResult* aResult;
2065         aResult = dynamic_cast<TSQLResult*> (fServer[3]->Query(sqlQuery));
2066         if (!aResult) {
2067                 Log("SHUTTLE", Form("UpdateShuttleLogbook - Can't execute query <%s>", sqlQuery.Data()));
2068                 return kFALSE;
2069         }
2070         delete aResult;
2071
2072         return kTRUE;
2073 }
2074
2075 //______________________________________________________________________________________________
2076 Int_t AliShuttle::GetCurrentRun() const
2077 {
2078         //
2079         // Get current run from logbook entry
2080         //
2081
2082         return fLogbookEntry ? fLogbookEntry->GetRun() : -1;
2083 }
2084
2085 //______________________________________________________________________________________________
2086 UInt_t AliShuttle::GetCurrentStartTime() const
2087 {
2088         //
2089         // get current start time
2090         //
2091
2092         return fLogbookEntry ? fLogbookEntry->GetStartTime() : 0;
2093 }
2094
2095 //______________________________________________________________________________________________
2096 UInt_t AliShuttle::GetCurrentEndTime() const
2097 {
2098         //
2099         // get current end time from logbook entry
2100         //
2101
2102         return fLogbookEntry ? fLogbookEntry->GetEndTime() : 0;
2103 }
2104
2105 //______________________________________________________________________________________________
2106 void AliShuttle::Log(const char* detector, const char* message)
2107 {
2108         //
2109         // Fill log string with a message
2110         //
2111
2112         void* dir = gSystem->OpenDirectory(GetShuttleLogDir());
2113         if (dir == NULL) {
2114                 if (gSystem->mkdir(GetShuttleLogDir(), kTRUE)) {
2115                         AliError(Form("Can't open directory <%s>", GetShuttleLogDir()));
2116                         return;
2117                 }
2118
2119         } else {
2120                 gSystem->FreeDirectory(dir);
2121         }
2122
2123         TString toLog = Form("%s (%d): %s - ", TTimeStamp(time(0)).AsString("s"), getpid(), detector);
2124         if (GetCurrentRun() >= 0) 
2125                 toLog += Form("run %d - ", GetCurrentRun());
2126         toLog += Form("%s", message);
2127
2128         AliInfo(toLog.Data());
2129
2130         TString fileName;
2131         if (GetCurrentRun() >= 0) 
2132                 fileName.Form("%s/%s_%d.log", GetShuttleLogDir(), detector, GetCurrentRun());
2133         else
2134                 fileName.Form("%s/%s.log", GetShuttleLogDir(), detector);
2135         
2136         gSystem->ExpandPathName(fileName);
2137
2138         ofstream logFile;
2139         logFile.open(fileName, ofstream::out | ofstream::app);
2140
2141         if (!logFile.is_open()) {
2142                 AliError(Form("Could not open file %s", fileName.Data()));
2143                 return;
2144         }
2145
2146         logFile << toLog.Data() << "\n";
2147
2148         logFile.close();
2149 }
2150
2151 //______________________________________________________________________________________________
2152 Bool_t AliShuttle::Collect(Int_t run)
2153 {
2154         //
2155         // Collects conditions data for all UNPROCESSED run written to DAQ LogBook in case of run = -1 (default)
2156         // If a dedicated run is given this run is processed
2157         //
2158         // In operational mode, this is the Shuttle function triggered by the EOR signal.
2159         //
2160
2161         if (run == -1)
2162                 Log("SHUTTLE","Collect - Shuttle called. Collecting conditions data for unprocessed runs");
2163         else
2164                 Log("SHUTTLE", Form("Collect - Shuttle called. Collecting conditions data for run %d", run));
2165
2166         SetLastAction("Starting");
2167
2168         TString whereClause("where shuttle_done=0");
2169         if (run != -1)
2170                 whereClause += Form(" and run=%d", run);
2171
2172         TObjArray shuttleLogbookEntries;
2173         if (!QueryShuttleLogbook(whereClause, shuttleLogbookEntries))
2174         {
2175                 Log("SHUTTLE", "Collect - Can't retrieve entries from Shuttle logbook");
2176                 return kFALSE;
2177         }
2178
2179         if (shuttleLogbookEntries.GetEntries() == 0)
2180         {
2181                 if (run == -1)
2182                         Log("SHUTTLE","Collect - Found no UNPROCESSED runs in Shuttle logbook");
2183                 else
2184                         Log("SHUTTLE", Form("Collect - Run %d is already DONE "
2185                                                 "or it does not exist in Shuttle logbook", run));
2186                 return kTRUE;
2187         }
2188
2189         for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
2190                 fFirstUnprocessed[iDet] = kTRUE;
2191
2192         if (run != -1)
2193         {
2194                 // query Shuttle logbook for earlier runs, check if some detectors are unprocessed,
2195                 // flag them into fFirstUnprocessed array
2196                 TString whereClause(Form("where shuttle_done=0 and run < %d", run));
2197                 TObjArray tmpLogbookEntries;
2198                 if (!QueryShuttleLogbook(whereClause, tmpLogbookEntries))
2199                 {
2200                         Log("SHUTTLE", "Collect - Can't retrieve entries from Shuttle logbook");
2201                         return kFALSE;
2202                 }
2203
2204                 TIter iter(&tmpLogbookEntries);
2205                 AliShuttleLogbookEntry* anEntry = 0;
2206                 while ((anEntry = dynamic_cast<AliShuttleLogbookEntry*> (iter.Next())))
2207                 {
2208                         for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
2209                         {
2210                                 if (anEntry->GetDetectorStatus(iDet) == AliShuttleLogbookEntry::kUnprocessed)
2211                                 {
2212                                         AliDebug(2, Form("Run %d: setting %s as \"not first time unprocessed\"",
2213                                                         anEntry->GetRun(), GetDetName(iDet)));
2214                                         fFirstUnprocessed[iDet] = kFALSE;
2215                                 }
2216                         }
2217
2218                 }
2219
2220         }
2221
2222         if (!RetrieveConditionsData(shuttleLogbookEntries))
2223         {
2224                 Log("SHUTTLE", "Collect - Process of at least one run failed");
2225                 return kFALSE;
2226         }
2227
2228         Log("SHUTTLE", "Collect - Requested run(s) successfully processed");
2229         return kTRUE;
2230 }
2231
2232 //______________________________________________________________________________________________
2233 Bool_t AliShuttle::RetrieveConditionsData(const TObjArray& dateEntries)
2234 {
2235         //
2236         // Retrieve conditions data for all runs that aren't processed yet
2237         //
2238
2239         Bool_t hasError = kFALSE;
2240
2241         TIter iter(&dateEntries);
2242         AliShuttleLogbookEntry* anEntry;
2243
2244         while ((anEntry = (AliShuttleLogbookEntry*) iter.Next())){
2245                 if (!Process(anEntry)){
2246                         hasError = kTRUE;
2247                 }
2248
2249                 // clean SHUTTLE temp directory
2250                 TString filename = Form("%s/*.shuttle", GetShuttleTempDir());
2251                 RemoveFile(filename.Data());
2252         }
2253
2254         return hasError == kFALSE;
2255 }
2256
2257 //______________________________________________________________________________________________
2258 ULong_t AliShuttle::GetTimeOfLastAction() const
2259 {
2260         //
2261         // Gets time of last action
2262         //
2263
2264         ULong_t tmp;
2265
2266         fMonitoringMutex->Lock();
2267
2268         tmp = fLastActionTime;
2269
2270         fMonitoringMutex->UnLock();
2271
2272         return tmp;
2273 }
2274
2275 //______________________________________________________________________________________________
2276 const TString AliShuttle::GetLastAction() const
2277 {
2278         //
2279         // returns a string description of the last action
2280         //
2281
2282         TString tmp;
2283
2284         fMonitoringMutex->Lock();
2285         
2286         tmp = fLastAction;
2287         
2288         fMonitoringMutex->UnLock();
2289
2290         return tmp;
2291 }
2292
2293 //______________________________________________________________________________________________
2294 void AliShuttle::SetLastAction(const char* action)
2295 {
2296         //
2297         // updates the monitoring variables
2298         //
2299
2300         fMonitoringMutex->Lock();
2301
2302         fLastAction = action;
2303         fLastActionTime = time(0);
2304         
2305         fMonitoringMutex->UnLock();
2306 }
2307
2308 //______________________________________________________________________________________________
2309 const char* AliShuttle::GetRunParameter(const char* param)
2310 {
2311         //
2312         // returns run parameter read from DAQ logbook
2313         //
2314
2315         if(!fLogbookEntry) {
2316                 AliError("No logbook entry!");
2317                 return 0;
2318         }
2319
2320         return fLogbookEntry->GetRunParameter(param);
2321 }
2322
2323 //______________________________________________________________________________________________
2324 AliCDBEntry* AliShuttle::GetFromOCDB(const char* detector, const AliCDBPath& path)
2325 {
2326         //
2327         // returns object from OCDB valid for current run
2328         //
2329
2330         if (fTestMode & kErrorOCDB)
2331         {
2332                 Log(detector, "GetFromOCDB - In TESTMODE - Simulating error with OCDB");
2333                 return 0;
2334         }
2335         
2336         AliCDBStorage *sto = AliCDBManager::Instance()->GetStorage(fgkMainCDB);
2337         if (!sto)
2338         {
2339                 Log(detector, "GetFromOCDB - Cannot activate main OCDB for query!");
2340                 return 0;
2341         }
2342
2343         return dynamic_cast<AliCDBEntry*> (sto->Get(path, GetCurrentRun()));
2344 }
2345
2346 //______________________________________________________________________________________________
2347 Bool_t AliShuttle::SendMail()
2348 {
2349         //
2350         // sends a mail to the subdetector expert in case of preprocessor error
2351         //
2352         
2353         if (fTestMode != kNone)
2354                 return kTRUE;
2355
2356         void* dir = gSystem->OpenDirectory(GetShuttleLogDir());
2357         if (dir == NULL)
2358         {
2359                 if (gSystem->mkdir(GetShuttleLogDir(), kTRUE))
2360                 {
2361                         AliError(Form("Can't open directory <%s>", GetShuttleLogDir()));
2362                         return kFALSE;
2363                 }
2364
2365         } else {
2366                 gSystem->FreeDirectory(dir);
2367         }
2368
2369         TString bodyFileName;
2370         bodyFileName.Form("%s/mail.body", GetShuttleLogDir());
2371         gSystem->ExpandPathName(bodyFileName);
2372
2373         ofstream mailBody;
2374         mailBody.open(bodyFileName, ofstream::out);
2375
2376         if (!mailBody.is_open())
2377         {
2378                 AliError(Form("Could not open mail body file %s", bodyFileName.Data()));
2379                 return kFALSE;
2380         }
2381
2382         TString to="";
2383         TIter iterExperts(fConfig->GetResponsibles(fCurrentDetector));
2384         TObjString *anExpert=0;
2385         while ((anExpert = (TObjString*) iterExperts.Next()))
2386         {
2387                 to += Form("%s,", anExpert->GetName());
2388         }
2389         to.Remove(to.Length()-1);
2390         AliDebug(2, Form("to: %s",to.Data()));
2391
2392         // TODO this will be removed...
2393         if (to.Contains("not_yet_set")) {
2394                 AliInfo("List of detector responsibles not yet set!");
2395                 return kFALSE;
2396         }
2397
2398         TString cc="alberto.colla@cern.ch";
2399
2400         TString subject = Form("%s Shuttle preprocessor error in run %d !",
2401                                 fCurrentDetector.Data(), GetCurrentRun());
2402         AliDebug(2, Form("subject: %s", subject.Data()));
2403
2404         TString body = Form("Dear %s expert(s), \n\n", fCurrentDetector.Data());
2405         body += Form("SHUTTLE just detected that your preprocessor "
2406                         "exited with ERROR state in run %d!!\n\n", GetCurrentRun());
2407         body += Form("Please check %s status on the web page asap!\n\n", fCurrentDetector.Data());
2408         body += Form("The last 10 lines of %s log file are following:\n\n");
2409
2410         AliDebug(2, Form("Body begin: %s", body.Data()));
2411
2412         mailBody << body.Data();
2413         mailBody.close();
2414         mailBody.open(bodyFileName, ofstream::out | ofstream::app);
2415
2416         TString logFileName = Form("%s/%s_%d.log", GetShuttleLogDir(), fCurrentDetector.Data(), GetCurrentRun());
2417         TString tailCommand = Form("tail -n 10 %s >> %s", logFileName.Data(), bodyFileName.Data());
2418         if (gSystem->Exec(tailCommand.Data()))
2419         {
2420                 mailBody << Form("%s log file not found ...\n\n", fCurrentDetector.Data());
2421         }
2422
2423         TString endBody = Form("------------------------------------------------------\n\n");
2424         endBody += Form("In case of problems please contact the SHUTTLE core team.\n\n");
2425         endBody += "Please do not answer this message directly, it is automatically generated.\n\n";
2426         endBody += "Sincerely yours,\n\n \t\t\tthe SHUTTLE\n";
2427
2428         AliDebug(2, Form("Body end: %s", endBody.Data()));
2429
2430         mailBody << endBody.Data();
2431
2432         mailBody.close();
2433
2434         // send mail!
2435         TString mailCommand = Form("mail -s \"%s\" -c %s %s < %s",
2436                                                 subject.Data(),
2437                                                 cc.Data(),
2438                                                 to.Data(),
2439                                                 bodyFileName.Data());
2440         AliDebug(2, Form("mail command: %s", mailCommand.Data()));
2441
2442         Bool_t result = gSystem->Exec(mailCommand.Data());
2443
2444         return result == 0;
2445 }
2446
2447 //______________________________________________________________________________________________
2448 const char* AliShuttle::GetRunType()
2449 {
2450         //
2451         // returns run type read from "run type" logbook
2452         //
2453
2454         if(!fLogbookEntry) {
2455                 AliError("No logbook entry!");
2456                 return 0;
2457         }
2458
2459         return fLogbookEntry->GetRunType();
2460 }
2461
2462 //______________________________________________________________________________________________
2463 void AliShuttle::SetShuttleTempDir(const char* tmpDir)
2464 {
2465         //
2466         // sets Shuttle temp directory
2467         //
2468
2469         fgkShuttleTempDir = gSystem->ExpandPathName(tmpDir);
2470 }
2471
2472 //______________________________________________________________________________________________
2473 void AliShuttle::SetShuttleLogDir(const char* logDir)
2474 {
2475         //
2476         // sets Shuttle log directory
2477         //
2478
2479         fgkShuttleLogDir = gSystem->ExpandPathName(logDir);
2480 }