]> git.uio.no Git - u/mrichter/AliRoot.git/blob - SHUTTLE/AliShuttle.cxx
Detectors can have more than one AMANDA server. SHUTTLE queries the servers sequentially,
[u/mrichter/AliRoot.git] / SHUTTLE / AliShuttle.cxx
1 /**************************************************************************
2  * Copyright(c) 1998-1999, ALICE Experiment at CERN, All rights reserved. *
3  *                                                                        *
4  * Author: The ALICE Off-line Project.                                    *
5  * Contributors are mentioned in the code where appropriate.              *
6  *                                                                        *
7  * Permission to use, copy, modify and distribute this software and its   *
8  * documentation strictly for non-commercial purposes is hereby granted   *
9  * without fee, provided that the above copyright notice appears in all   *
10  * copies and that both the copyright notice and this permission notice   *
11  * appear in the supporting documentation. The authors make no claims     *
12  * about the suitability of this software for any purpose. It is          *
13  * provided "as is" without express or implied warranty.                  *
14  **************************************************************************/
15
16 /*
17 $Log$
18 Revision 1.56  2007/09/14 16:46:14  jgrosseo
19 1) Connect and Close are called before and after each query, so one can
20 keep the same AliDCSClient object.
21 2) The splitting of a query is moved to GetDPValues/GetAliasValues.
22 3) Splitting interval can be specified in constructor
23
24 Revision 1.55  2007/08/06 12:26:40  acolla
25 Function Bool_t GetHLTStatus added to preprocessor. It returns the status of HLT
26 read from the run logbook.
27
28 Revision 1.54  2007/07/12 09:51:25  jgrosseo
29 removed duplicated log message in GetFile
30
31 Revision 1.53  2007/07/12 09:26:28  jgrosseo
32 updating hlt fxs base path
33
34 Revision 1.52  2007/07/12 08:06:45  jgrosseo
35 adding log messages in getfile... functions
36 adding not implemented copy constructor in alishuttleconfigholder
37
38 Revision 1.51  2007/07/03 17:24:52  acolla
39 root moved to v5-16-00. TFileMerger->Cp moved to TFile::Cp.
40
41 Revision 1.50  2007/07/02 17:19:32  acolla
42 preprocessor is run in a temp directory that is removed when process is finished.
43
44 Revision 1.49  2007/06/29 10:45:06  acolla
45 Number of columns in MySql Shuttle logbook increased by one (HLT added)
46
47 Revision 1.48  2007/06/21 13:06:19  acolla
48 GetFileSources returns dummy list with 1 source if system=DCS (better than
49 returning error as it was)
50
51 Revision 1.47  2007/06/19 17:28:56  acolla
52 HLT updated; missing map bug removed.
53
54 Revision 1.46  2007/06/09 13:01:09  jgrosseo
55 Switching to retrieval of several DCS DPs at a time (multiDPrequest)
56
57 Revision 1.45  2007/05/30 06:35:20  jgrosseo
58 Adding functionality to the Shuttle/TestShuttle:
59 o) Function to retrieve list of sources from a given system (GetFileSources with id=0)
60 o) Function to retrieve list of IDs for a given source      (GetFileIDs)
61 These functions are needed for dealing with the tag files that are saved for the GRP preprocessor
62 Example code has been added to the TestProcessor in TestShuttle
63
64 Revision 1.44  2007/05/11 16:09:32  acolla
65 Reference files for ITS, MUON and PHOS are now stored in OfflineDetName/OnlineDetName/run_...
66 example: ITS/SPD/100_filename.root
67
68 Revision 1.43  2007/05/10 09:59:51  acolla
69 Various bug fixes in StoreRefFilesToGrid; Cleaning of reference storage before processing detector (CleanReferenceStorage)
70
71 Revision 1.42  2007/05/03 08:01:39  jgrosseo
72 typo in last commit :-(
73
74 Revision 1.41  2007/05/03 08:00:48  jgrosseo
75 fixing log message when pp want to skip dcs value retrieval
76
77 Revision 1.40  2007/04/27 07:06:48  jgrosseo
78 GetFileSources returns empty list in case of no files, but successful query
79 No mails sent in testmode
80
81 Revision 1.39  2007/04/17 12:43:57  acolla
82 Correction in StoreOCDB; change of text in mail to detector expert
83
84 Revision 1.38  2007/04/12 08:26:18  jgrosseo
85 updated comment
86
87 Revision 1.37  2007/04/10 16:53:14  jgrosseo
88 redirecting sub detector stdout, stderr to sub detector log file
89
90 Revision 1.35  2007/04/04 16:26:38  acolla
91 1. Re-organization of function calls in TestPreprocessor to make it more meaningful.
92 2. Added missing dependency in test preprocessors.
93 3. in AliShuttle.cxx: processing time and memory consumption info on a single line.
94
95 Revision 1.34  2007/04/04 10:33:36  jgrosseo
96 1) Storing of files to the Grid is now done _after_ your preprocessors succeeded. This is transparent, which means that you can still use the same functions (Store, StoreReferenceData) to store files to the Grid. However, the Shuttle first stores them locally and transfers them after the preprocessor finished. The return code of these two functions has changed from UInt_t to Bool_t which gives you the success of the storing.
97 In case of an error with the Grid, the Shuttle will retry the storing later, the preprocessor does not need to be run again.
98
99 2) The meaning of the return code of the preprocessor has changed. 0 is now success and any other value means failure. This value is stored in the log and you can use it to keep details about the error condition.
100
101 3) New function StoreReferenceFile to _directly_ store a file (without opening it) to the reference storage.
102
103 4) The memory usage of the preprocessor is monitored. If it exceeds 2 GB it is terminated.
104
105 5) New function AliPreprocessor::ProcessDCS(). If you do not need to have DCS data in all cases, you can skip the processing by implemting this function and returning kFALSE under certain conditions. E.g. if there is a certain run type.
106 If you always need DCS data (like before), you do not need to implement it.
107
108 6) The run type has been added to the monitoring page
109
110 Revision 1.33  2007/04/03 13:56:01  acolla
111 Grid Storage at the end of preprocessing. Added virtual method to disable DCS query according to the
112 run type.
113
114 Revision 1.32  2007/02/28 10:41:56  acolla
115 Run type field added in SHUTTLE framework. Run type is read from "run type" logbook and retrieved by
116 AliPreprocessor::GetRunType() function.
117 Added some ldap definition files.
118
119 Revision 1.30  2007/02/13 11:23:21  acolla
120 Moved getters and setters of Shuttle's main OCDB/Reference, local
121 OCDB/Reference, temp and log folders to AliShuttleInterface
122
123 Revision 1.27  2007/01/30 17:52:42  jgrosseo
124 adding monalisa monitoring
125
126 Revision 1.26  2007/01/23 19:20:03  acolla
127 Removed old ldif files, added TOF, MCH ldif files. Added some options in
128 AliShuttleConfig::Print. Added in Ali Shuttle: SetShuttleTempDir and
129 SetShuttleLogDir
130
131 Revision 1.25  2007/01/15 19:13:52  acolla
132 Moved some AliInfo to AliDebug in SendMail function
133
134 Revision 1.21  2006/12/07 08:51:26  jgrosseo
135 update (alberto):
136 table, db names in ldap configuration
137 added GRP preprocessor
138 DCS data can also be retrieved by data point
139
140 Revision 1.20  2006/11/16 16:16:48  jgrosseo
141 introducing strict run ordering flag
142 removed giving preprocessor name to preprocessor, they have to know their name themselves ;-)
143
144 Revision 1.19  2006/11/06 14:23:04  jgrosseo
145 major update (Alberto)
146 o) reading of run parameters from the logbook
147 o) online offline naming conversion
148 o) standalone DCSclient package
149
150 Revision 1.18  2006/10/20 15:22:59  jgrosseo
151 o) Adding time out to the execution of the preprocessors: The Shuttle forks and the parent process monitors the child
152 o) Merging Collect, CollectAll, CollectNew function
153 o) Removing implementation of empty copy constructors (declaration still there!)
154
155 Revision 1.17  2006/10/05 16:20:55  jgrosseo
156 adapting to new CDB classes
157
158 Revision 1.16  2006/10/05 15:46:26  jgrosseo
159 applying to the new interface
160
161 Revision 1.15  2006/10/02 16:38:39  jgrosseo
162 update (alberto):
163 fixed memory leaks
164 storing of objects that failed to be stored to the grid before
165 interfacing of shuttle status table in daq system
166
167 Revision 1.14  2006/08/29 09:16:05  jgrosseo
168 small update
169
170 Revision 1.13  2006/08/15 10:50:00  jgrosseo
171 effc++ corrections (alberto)
172
173 Revision 1.12  2006/08/08 14:19:29  jgrosseo
174 Update to shuttle classes (Alberto)
175
176 - Possibility to set the full object's path in the Preprocessor's and
177 Shuttle's  Store functions
178 - Possibility to extend the object's run validity in the same classes
179 ("startValidity" and "validityInfinite" parameters)
180 - Implementation of the StoreReferenceData function to store reference
181 data in a dedicated CDB storage.
182
183 Revision 1.11  2006/07/21 07:37:20  jgrosseo
184 last run is stored after each run
185
186 Revision 1.10  2006/07/20 09:54:40  jgrosseo
187 introducing status management: The processing per subdetector is divided into several steps,
188 after each step the status is stored on disk. If the system crashes in any of the steps the Shuttle
189 can keep track of the number of failures and skips further processing after a certain threshold is
190 exceeded. These thresholds can be configured in LDAP.
191
192 Revision 1.9  2006/07/19 10:09:55  jgrosseo
193 new configuration, accesst to DAQ FES (Alberto)
194
195 Revision 1.8  2006/07/11 12:44:36  jgrosseo
196 adding parameters for extended validity range of data produced by preprocessor
197
198 Revision 1.7  2006/07/10 14:37:09  jgrosseo
199 small fix + todo comment
200
201 Revision 1.6  2006/07/10 13:01:41  jgrosseo
202 enhanced storing of last sucessfully processed run (alberto)
203
204 Revision 1.5  2006/07/04 14:59:57  jgrosseo
205 revision of AliDCSValue: Removed wrapper classes, reduced storage size per value by factor 2
206
207 Revision 1.4  2006/06/12 09:11:16  jgrosseo
208 coding conventions (Alberto)
209
210 Revision 1.3  2006/06/06 14:26:40  jgrosseo
211 o) removed files that were moved to STEER
212 o) shuttle updated to follow the new interface (Alberto)
213
214 Revision 1.2  2006/03/07 07:52:34  hristov
215 New version (B.Yordanov)
216
217 Revision 1.6  2005/11/19 17:19:14  byordano
218 RetrieveDATEEntries and RetrieveConditionsData added
219
220 Revision 1.5  2005/11/19 11:09:27  byordano
221 AliShuttle declaration added
222
223 Revision 1.4  2005/11/17 17:47:34  byordano
224 TList changed to TObjArray
225
226 Revision 1.3  2005/11/17 14:43:23  byordano
227 import to local CVS
228
229 Revision 1.1.1.1  2005/10/28 07:33:58  hristov
230 Initial import as subdirectory in AliRoot
231
232 Revision 1.2  2005/09/13 08:41:15  byordano
233 default startTime endTime added
234
235 Revision 1.4  2005/08/30 09:13:02  byordano
236 some docs added
237
238 Revision 1.3  2005/08/29 21:15:47  byordano
239 some docs added
240
241 */
242
243 //
244 // This class is the main manager for AliShuttle. 
245 // It organizes the data retrieval from DCS and call the 
246 // interface methods of AliPreprocessor.
247 // For every detector in AliShuttleConfgi (see AliShuttleConfig),
248 // data for its set of aliases is retrieved. If there is registered
249 // AliPreprocessor for this detector then it will be used
250 // accroding to the schema (see AliPreprocessor).
251 // If there isn't registered AliPreprocessor than the retrieved
252 // data is stored automatically to the undelying AliCDBStorage.
253 // For detSpec is used the alias name.
254 //
255
256 #include "AliShuttle.h"
257
258 #include "AliCDBManager.h"
259 #include "AliCDBStorage.h"
260 #include "AliCDBId.h"
261 #include "AliCDBRunRange.h"
262 #include "AliCDBPath.h"
263 #include "AliCDBEntry.h"
264 #include "AliShuttleConfig.h"
265 #include "DCSClient/AliDCSClient.h"
266 #include "AliLog.h"
267 #include "AliPreprocessor.h"
268 #include "AliShuttleStatus.h"
269 #include "AliShuttleLogbookEntry.h"
270
271 #include <TSystem.h>
272 #include <TObject.h>
273 #include <TString.h>
274 #include <TTimeStamp.h>
275 #include <TObjString.h>
276 #include <TSQLServer.h>
277 #include <TSQLResult.h>
278 #include <TSQLRow.h>
279 #include <TMutex.h>
280 #include <TSystemDirectory.h>
281 #include <TSystemFile.h>
282 #include <TFile.h>
283 #include <TGrid.h>
284 #include <TGridResult.h>
285
286 #include <TMonaLisaWriter.h>
287
288 #include <fstream>
289
290 #include <sys/types.h>
291 #include <sys/wait.h>
292
293 ClassImp(AliShuttle)
294
295 //______________________________________________________________________________________________
296 AliShuttle::AliShuttle(const AliShuttleConfig* config,
297                 UInt_t timeout, Int_t retries):
298 fConfig(config),
299 fTimeout(timeout), fRetries(retries),
300 fPreprocessorMap(),
301 fLogbookEntry(0),
302 fCurrentDetector(),
303 fStatusEntry(0),
304 fMonitoringMutex(0),
305 fLastActionTime(0),
306 fLastAction(),
307 fMonaLisa(0),
308 fTestMode(kNone),
309 fReadTestMode(kFALSE),
310 fOutputRedirected(kFALSE)
311 {
312         //
313         // config: AliShuttleConfig used
314         // timeout: timeout used for AliDCSClient connection
315         // retries: the number of retries in case of connection error.
316         //
317
318         if (!fConfig->IsValid()) AliFatal("********** !!!!! Invalid configuration !!!!! **********");
319         for(int iSys=0;iSys<4;iSys++) {
320                 fServer[iSys]=0;
321                 if (iSys < 3)
322                         fFXSlist[iSys].SetOwner(kTRUE);
323         }
324         fPreprocessorMap.SetOwner(kTRUE);
325
326         for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
327                 fFirstUnprocessed[iDet] = kFALSE;
328
329         fMonitoringMutex = new TMutex();
330 }
331
332 //______________________________________________________________________________________________
333 AliShuttle::~AliShuttle()
334 {
335         //
336         // destructor
337         //
338
339         fPreprocessorMap.DeleteAll();
340         for(int iSys=0;iSys<4;iSys++)
341                 if(fServer[iSys]) {
342                         fServer[iSys]->Close();
343                         delete fServer[iSys];
344                         fServer[iSys] = 0;
345                 }
346
347         if (fStatusEntry){
348                 delete fStatusEntry;
349                 fStatusEntry = 0;
350         }
351         
352         if (fMonitoringMutex) 
353         {
354                 delete fMonitoringMutex;
355                 fMonitoringMutex = 0;
356         }
357 }
358
359 //______________________________________________________________________________________________
360 void AliShuttle::RegisterPreprocessor(AliPreprocessor* preprocessor)
361 {
362         //
363         // Registers new AliPreprocessor.
364         // It uses GetName() for indentificator of the pre processor.
365         // The pre processor is registered it there isn't any other
366         // with the same identificator (GetName()).
367         //
368
369         const char* detName = preprocessor->GetName();
370         if(GetDetPos(detName) < 0)
371                 AliFatal(Form("********** !!!!! Invalid detector name: %s !!!!! **********", detName));
372
373         if (fPreprocessorMap.GetValue(detName)) {
374                 AliWarning(Form("AliPreprocessor %s is already registered!", detName));
375                 return;
376         }
377
378         fPreprocessorMap.Add(new TObjString(detName), preprocessor);
379 }
380 //______________________________________________________________________________________________
381 Bool_t AliShuttle::Store(const AliCDBPath& path, TObject* object,
382                 AliCDBMetaData* metaData, Int_t validityStart, Bool_t validityInfinite)
383 {
384         // Stores a CDB object in the storage for offline reconstruction. Objects that are not needed for
385         // offline reconstruction, but should be stored anyway (e.g. for debugging) should NOT be stored
386         // using this function. Use StoreReferenceData instead!
387         // It calls StoreLocally function which temporarily stores the data locally; when the preprocessor
388         // finishes the data are transferred to the main storage (Grid).
389
390         return StoreLocally(fgkLocalCDB, path, object, metaData, validityStart, validityInfinite);
391 }
392
393 //______________________________________________________________________________________________
394 Bool_t AliShuttle::StoreReferenceData(const AliCDBPath& path, TObject* object, AliCDBMetaData* metaData)
395 {
396         // Stores a CDB object in the storage for reference data. This objects will not be available during
397         // offline reconstrunction. Use this function for reference data only!
398         // It calls StoreLocally function which temporarily stores the data locally; when the preprocessor
399         // finishes the data are transferred to the main storage (Grid).
400
401         return StoreLocally(fgkLocalRefStorage, path, object, metaData);
402 }
403
404 //______________________________________________________________________________________________
405 Bool_t AliShuttle::StoreLocally(const TString& localUri,
406                         const AliCDBPath& path, TObject* object, AliCDBMetaData* metaData,
407                         Int_t validityStart, Bool_t validityInfinite)
408 {
409         // Store object temporarily in local storage. Parameters are passed by Store and StoreReferenceData functions.
410         // when the preprocessor finishes the data are transferred to the main storage (Grid).
411         // The parameters are:
412         //   1) Uri of the backup storage (Local)
413         //   2) the object's path.
414         //   3) the object to be stored
415         //   4) the metaData to be associated with the object
416         //   5) the validity start run number w.r.t. the current run,
417         //      if the data is valid only for this run leave the default 0
418         //   6) specifies if the calibration data is valid for infinity (this means until updated),
419         //      typical for calibration runs, the default is kFALSE
420         //
421         // returns 0 if fail, 1 otherwise
422
423         if (fTestMode & kErrorStorage)
424         {
425                 Log(fCurrentDetector, "StoreLocally - In TESTMODE - Simulating error while storing locally");
426                 return kFALSE;
427         }
428         
429         const char* cdbType = (localUri == fgkLocalCDB) ? "CDB" : "Reference";
430
431         Int_t firstRun = GetCurrentRun() - validityStart;
432         if(firstRun < 0) {
433                 AliWarning("First valid run happens to be less than 0! Setting it to 0.");
434                 firstRun=0;
435         }
436
437         Int_t lastRun = -1;
438         if(validityInfinite) {
439                 lastRun = AliCDBRunRange::Infinity();
440         } else {
441                 lastRun = GetCurrentRun();
442         }
443
444         // Version is set to current run, it will be used later to transfer data to Grid
445         AliCDBId id(path, firstRun, lastRun, GetCurrentRun(), -1);
446
447         if(! dynamic_cast<TObjString*> (metaData->GetProperty("RunUsed(TObjString)"))){
448                 TObjString runUsed = Form("%d", GetCurrentRun());
449                 metaData->SetProperty("RunUsed(TObjString)", runUsed.Clone());
450         }
451
452         Bool_t result = kFALSE;
453
454         if (!(AliCDBManager::Instance()->GetStorage(localUri))) {
455                 Log("SHUTTLE", Form("StoreLocally - Cannot activate local %s storage", cdbType));
456         } else {
457                 result = AliCDBManager::Instance()->GetStorage(localUri)
458                                         ->Put(object, id, metaData);
459         }
460
461         if(!result) {
462
463                 Log(fCurrentDetector, Form("StoreLocally - Can't store object <%s>!", id.ToString().Data()));
464         }
465
466         return result;
467 }
468
469 //______________________________________________________________________________________________
470 Bool_t AliShuttle::StoreOCDB()
471 {
472         //
473         // Called when preprocessor ends successfully or when previous storage attempt failed (kStoreError status)
474         // Calls underlying StoreOCDB(const char*) function twice, for OCDB and Reference storage.
475         // Then calls StoreRefFilesToGrid to store reference files. 
476         //
477         
478         if (fTestMode & kErrorGrid)
479         {
480                 Log("SHUTTLE", "StoreOCDB - In TESTMODE - Simulating error while storing in the Grid");
481                 Log(fCurrentDetector, "StoreOCDB - In TESTMODE - Simulating error while storing in the Grid");
482                 return kFALSE;
483         }
484         
485         Log("SHUTTLE","Storing OCDB data ...");
486         Bool_t resultCDB = StoreOCDB(fgkMainCDB);
487
488         Log("SHUTTLE","Storing reference data ...");
489         Bool_t resultRef = StoreOCDB(fgkMainRefStorage);
490         
491         Log("SHUTTLE","Storing reference files ...");
492         Bool_t resultRefFiles = StoreRefFilesToGrid();
493         
494         return resultCDB && resultRef && resultRefFiles;
495 }
496
497 //______________________________________________________________________________________________
498 Bool_t AliShuttle::StoreOCDB(const TString& gridURI)
499 {
500         //
501         // Called by StoreOCDB(), performs actual storage to the main OCDB and reference storages (Grid)
502         //
503
504         TObjArray* gridIds=0;
505
506         Bool_t result = kTRUE;
507
508         const char* type = 0;
509         TString localURI;
510         if(gridURI == fgkMainCDB) {
511                 type = "OCDB";
512                 localURI = fgkLocalCDB;
513         } else if(gridURI == fgkMainRefStorage) {
514                 type = "reference";
515                 localURI = fgkLocalRefStorage;
516         } else {
517                 AliError(Form("Invalid storage URI: %s", gridURI.Data()));
518                 return kFALSE;
519         }
520
521         AliCDBManager* man = AliCDBManager::Instance();
522
523         AliCDBStorage *gridSto = man->GetStorage(gridURI);
524         if(!gridSto) {
525                 Log("SHUTTLE",
526                         Form("StoreOCDB - cannot activate main %s storage", type));
527                 return kFALSE;
528         }
529
530         gridIds = gridSto->GetQueryCDBList();
531
532         // get objects previously stored in local CDB
533         AliCDBStorage *localSto = man->GetStorage(localURI);
534         if(!localSto) {
535                 Log("SHUTTLE",
536                         Form("StoreOCDB - cannot activate local %s storage", type));
537                 return kFALSE;
538         }
539         AliCDBPath aPath(GetOfflineDetName(fCurrentDetector.Data()),"*","*");
540         // Local objects were stored with current run as Grid version!
541         TList* localEntries = localSto->GetAll(aPath.GetPath(), GetCurrentRun(), GetCurrentRun());
542         localEntries->SetOwner(1);
543
544         // loop on local stored objects
545         TIter localIter(localEntries);
546         AliCDBEntry *aLocEntry = 0;
547         while((aLocEntry = dynamic_cast<AliCDBEntry*> (localIter.Next()))){
548                 aLocEntry->SetOwner(1);
549                 AliCDBId aLocId = aLocEntry->GetId();
550                 aLocEntry->SetVersion(-1);
551                 aLocEntry->SetSubVersion(-1);
552
553                 // If local object is valid up to infinity we store it only if it is
554                 // the first unprocessed run!
555                 if (aLocId.GetLastRun() == AliCDBRunRange::Infinity() &&
556                         !fFirstUnprocessed[GetDetPos(fCurrentDetector)])
557                 {
558                         Log("SHUTTLE", Form("StoreOCDB - %s: object %s has validity infinite but "
559                                                 "there are previous unprocessed runs!",
560                                                 fCurrentDetector.Data(), aLocId.GetPath().Data()));
561                         continue;
562                 }
563
564                 // loop on Grid valid Id's
565                 Bool_t store = kTRUE;
566                 TIter gridIter(gridIds);
567                 AliCDBId* aGridId = 0;
568                 while((aGridId = dynamic_cast<AliCDBId*> (gridIter.Next()))){
569                         if(aGridId->GetPath() != aLocId.GetPath()) continue;
570                         // skip all objects valid up to infinity
571                         if(aGridId->GetLastRun() == AliCDBRunRange::Infinity()) continue;
572                         // if we get here, it means there's already some more recent object stored on Grid!
573                         store = kFALSE;
574                         break;
575                 }
576
577                 // If we get here, the file can be stored!
578                 Bool_t storeOk = gridSto->Put(aLocEntry);
579                 if(!store || storeOk){
580
581                         if (!store)
582                         {
583                                 Log(fCurrentDetector.Data(),
584                                         Form("StoreOCDB - A more recent object already exists in %s storage: <%s>",
585                                                 type, aGridId->ToString().Data()));
586                         } else {
587                                 Log("SHUTTLE",
588                                         Form("StoreOCDB - Object <%s> successfully put into %s storage",
589                                                 aLocId.ToString().Data(), type));
590                                 Log(fCurrentDetector.Data(),
591                                         Form("StoreOCDB - Object <%s> successfully put into %s storage",
592                                                 aLocId.ToString().Data(), type));
593                         }
594
595                         // removing local filename...
596                         TString filename;
597                         localSto->IdToFilename(aLocId, filename);
598                         AliInfo(Form("Removing local file %s", filename.Data()));
599                         RemoveFile(filename.Data());
600                         continue;
601                 } else  {
602                         Log("SHUTTLE",
603                                 Form("StoreOCDB - Grid %s storage of object <%s> failed",
604                                         type, aLocId.ToString().Data()));
605                         Log(fCurrentDetector.Data(),
606                                 Form("StoreOCDB - Grid %s storage of object <%s> failed",
607                                         type, aLocId.ToString().Data()));
608                         result = kFALSE;
609                 }
610         }
611         localEntries->Clear();
612
613         return result;
614 }
615
616 //______________________________________________________________________________________________
617 Bool_t AliShuttle::CleanReferenceStorage(const char* detector)
618 {
619         // clears the directory used to store reference files of a given subdetector
620   
621         AliCDBManager* man = AliCDBManager::Instance();
622         AliCDBStorage* sto = man->GetStorage(fgkLocalRefStorage);
623         TString localBaseFolder = sto->GetBaseFolder();
624
625         TString targetDir = GetRefFilePrefix(localBaseFolder.Data(), detector);
626         
627         Log("SHUTTLE", Form("Cleaning %s", targetDir.Data()));
628
629         TString begin;
630         begin.Form("%d_", GetCurrentRun());
631         
632         TSystemDirectory* baseDir = new TSystemDirectory("/", targetDir);
633         if (!baseDir)
634                 return kTRUE;
635                 
636         TList* dirList = baseDir->GetListOfFiles();
637         delete baseDir;
638         
639         if (!dirList) return kTRUE;
640                         
641         if (dirList->GetEntries() < 3) 
642         {
643                 delete dirList;
644                 return kTRUE;
645         }
646                                 
647         Int_t nDirs = 0, nDel = 0;
648         TIter dirIter(dirList);
649         TSystemFile* entry = 0;
650
651         Bool_t success = kTRUE;
652         
653         while ((entry = dynamic_cast<TSystemFile*> (dirIter.Next())))
654         {                                       
655                 if (entry->IsDirectory())
656                         continue;
657                 
658                 TString fileName(entry->GetName());
659                 if (!fileName.BeginsWith(begin))
660                         continue;
661                         
662                 nDirs++;
663                                                 
664                 // delete file
665                 Int_t result = gSystem->Unlink(fileName.Data());
666                 
667                 if (result)
668                 {
669                         Log("SHUTTLE", Form("Could not delete file %s!", fileName.Data()));
670                         success = kFALSE;
671                 } else {
672                         nDel++;
673                 }
674         }
675
676         if(nDirs > 0)
677                 Log("SHUTTLE", Form("CleanReferenceStorage - %d (over %d) reference files in folder %s were deleted.", 
678                         nDel, nDirs, targetDir.Data()));
679
680                 
681         delete dirList;
682         return success;
683
684
685
686
687
688
689   Int_t result = gSystem->GetPathInfo(targetDir, 0, (Long64_t*) 0, 0, 0);
690   if (result == 0)
691   {
692     // delete directory
693     result = gSystem->Exec(Form("rm -r %s", targetDir.Data()));
694     if (result != 0)
695     {  
696       Log("SHUTTLE", Form("StoreReferenceFile - Could not clear directory %s", targetDir.Data()));
697       return kFALSE;
698     }
699   }
700
701   result = gSystem->mkdir(targetDir, kTRUE);
702   if (result != 0)
703   {
704     Log("SHUTTLE", Form("StoreReferenceFile - Error creating base directory %s", targetDir.Data()));
705     return kFALSE;
706   }
707         
708   return kTRUE;
709 }
710
711 //______________________________________________________________________________________________
712 Bool_t AliShuttle::StoreReferenceFile(const char* detector, const char* localFile, const char* gridFileName)
713 {
714         //
715         // Stores reference file directly (without opening it). This function stores the file locally.
716         //
717         // The file is stored under the following location: 
718         // <base folder of local reference storage>/<DET>/<RUN#>_<gridFileName>
719         // where <gridFileName> is the second parameter given to the function
720         // 
721         
722         if (fTestMode & kErrorStorage)
723         {
724                 Log(fCurrentDetector, "StoreReferenceFile - In TESTMODE - Simulating error while storing locally");
725                 return kFALSE;
726         }
727         
728         AliCDBManager* man = AliCDBManager::Instance();
729         AliCDBStorage* sto = man->GetStorage(fgkLocalRefStorage);
730         
731         TString localBaseFolder = sto->GetBaseFolder();
732         
733         TString targetDir = GetRefFilePrefix(localBaseFolder.Data(), detector); 
734         
735         //try to open folder, if does not exist
736         void* dir = gSystem->OpenDirectory(targetDir.Data());
737         if (dir == NULL) {
738                 if (gSystem->mkdir(targetDir.Data(), kTRUE)) {
739                         Log("SHUTTLE", Form("Can't open directory <%s>", targetDir.Data()));
740                         return kFALSE;
741                 }
742
743         } else {
744                 gSystem->FreeDirectory(dir);
745         }
746
747         TString target;
748         target.Form("%s/%d_%s", targetDir.Data(), GetCurrentRun(), gridFileName);
749         
750         Int_t result = gSystem->GetPathInfo(localFile, 0, (Long64_t*) 0, 0, 0);
751         if (result)
752         {
753                 Log("SHUTTLE", Form("StoreReferenceFile - %s does not exist", localFile));
754                 return kFALSE;
755         }
756
757         result = gSystem->CopyFile(localFile, target);
758
759         if (result == 0)
760         {
761                 Log("SHUTTLE", Form("StoreReferenceFile - File %s stored locally to %s", localFile, target.Data()));
762                 return kTRUE;
763         }
764         else
765         {
766                 Log("SHUTTLE", Form("StoreReferenceFile - Could not store file %s to %s!. Error code = %d", 
767                                 localFile, target.Data(), result));
768                 return kFALSE;
769         }       
770 }
771
772 //______________________________________________________________________________________________
773 Bool_t AliShuttle::StoreRefFilesToGrid()
774 {
775         //
776         // Transfers the reference file to the Grid.
777         //
778         // The files are stored under the following location: 
779         // <base folder of reference storage>/<DET>/<RUN#>_<gridFileName>
780         //
781         
782         AliCDBManager* man = AliCDBManager::Instance();
783         AliCDBStorage* sto = man->GetStorage(fgkLocalRefStorage);
784         if (!sto)
785                 return kFALSE;
786         TString localBaseFolder = sto->GetBaseFolder();
787                 
788         TString dir = GetRefFilePrefix(localBaseFolder.Data(), fCurrentDetector.Data());
789                 
790         AliCDBStorage* gridSto = man->GetStorage(fgkMainRefStorage);
791         if (!gridSto)
792                 return kFALSE;
793         
794         TString gridBaseFolder = gridSto->GetBaseFolder();
795
796         TString alienDir = GetRefFilePrefix(gridBaseFolder.Data(), fCurrentDetector.Data());
797         
798         TString begin;
799         begin.Form("%d_", GetCurrentRun());
800         
801         TSystemDirectory* baseDir = new TSystemDirectory("/", dir);
802         if (!baseDir)
803                 return kTRUE;
804                 
805         TList* dirList = baseDir->GetListOfFiles();
806         delete baseDir;
807         
808         if (!dirList) return kTRUE;
809                 
810         if (dirList->GetEntries() < 3) 
811         {
812                 delete dirList;
813                 return kTRUE;
814         }
815                         
816         if (!gGrid)
817         { 
818                 Log("SHUTTLE", "Connection to Grid failed: Cannot continue!");
819                 delete dirList;
820                 return kFALSE;
821         }
822         
823         Int_t nDirs = 0, nTransfer = 0;
824         TIter dirIter(dirList);
825         TSystemFile* entry = 0;
826
827         Bool_t success = kTRUE;
828         Bool_t first = kTRUE;
829         
830         while ((entry = dynamic_cast<TSystemFile*> (dirIter.Next())))
831         {                       
832                 if (entry->IsDirectory())
833                         continue;
834                         
835                 TString fileName(entry->GetName());
836                 if (!fileName.BeginsWith(begin))
837                         continue;
838                         
839                 nDirs++;
840                         
841                 if (first)
842                 {
843                         first = kFALSE;
844                         // check that DET folder exists, otherwise create it
845                         TGridResult* result = gGrid->Ls(alienDir.Data(), "a");
846                         
847                         if (!result)
848                         {
849                                 delete dirList;
850                                 return kFALSE;
851                         }
852                         
853                         if (!result->GetFileName(1)) // TODO: It looks like element 0 is always 0!!
854                         {
855                                 if (!gGrid->Mkdir(alienDir.Data(),"",0))
856                                 {
857                                         Log("SHUTTLE", Form("StoreRefFilesToGrid - Cannot create directory %s",
858                                                         alienDir.Data()));
859                                         delete dirList;
860                                         return kFALSE;
861                                 } else {
862                                         Log("SHUTTLE",Form("Folder %s created", alienDir.Data()));
863                                 }
864                                 
865                         } else {
866                                         Log("SHUTTLE",Form("Folder %s found", alienDir.Data()));
867                         }
868                 }
869                         
870                 TString fullLocalPath;
871                 fullLocalPath.Form("%s/%s", dir.Data(), fileName.Data());
872                 
873                 TString fullGridPath;
874                 fullGridPath.Form("alien://%s/%s", alienDir.Data(), fileName.Data());
875
876                 Bool_t result = TFile::Cp(fullLocalPath, fullGridPath);
877                 
878                 if (result)
879                 {
880                         Log("SHUTTLE", Form("StoreRefFilesToGrid - Copying local file %s to %s succeeded!", fullLocalPath.Data(), fullGridPath.Data()));
881                         RemoveFile(fullLocalPath);
882                         nTransfer++;
883                 }
884                 else
885                 {
886                         Log("SHUTTLE", Form("StoreRefFilesToGrid - Copying local file %s to %s FAILED!", fullLocalPath.Data(), fullGridPath.Data()));
887                         success = kFALSE;
888                 }
889         }
890
891         Log("SHUTTLE", Form("StoreRefFilesToGrid - %d (over %d) reference files in folder %s copied to Grid.", nTransfer, nDirs, dir.Data()));
892
893                 
894         delete dirList;
895         return success;
896 }
897
898 //______________________________________________________________________________________________
899 const char* AliShuttle::GetRefFilePrefix(const char* base, const char* detector)
900 {
901         //
902         // Get folder name of reference files 
903         //
904
905         TString offDetStr(GetOfflineDetName(detector));
906         TString dir;
907         if (offDetStr == "ITS" || offDetStr == "MUON" || offDetStr == "PHOS")
908         {
909                 dir.Form("%s/%s/%s", base, offDetStr.Data(), detector);
910         } else {
911                 dir.Form("%s/%s", base, offDetStr.Data());
912         }
913         
914         return dir.Data();
915         
916
917 }
918 //______________________________________________________________________________________________
919 void AliShuttle::CleanLocalStorage(const TString& uri)
920 {
921         //
922         // Called in case the preprocessor is declared failed. Remove remaining objects from the local storages.
923         //
924
925         const char* type = 0;
926         if(uri == fgkLocalCDB) {
927                 type = "OCDB";
928         } else if(uri == fgkLocalRefStorage) {
929                 type = "Reference";
930         } else {
931                 AliError(Form("Invalid storage URI: %s", uri.Data()));
932                 return;
933         }
934
935         AliCDBManager* man = AliCDBManager::Instance();
936
937         // open local storage
938         AliCDBStorage *localSto = man->GetStorage(uri);
939         if(!localSto) {
940                 Log("SHUTTLE",
941                         Form("CleanLocalStorage - cannot activate local %s storage", type));
942                 return;
943         }
944
945         TString filename(Form("%s/%s/*/Run*_v%d_s*.root",
946                 localSto->GetBaseFolder().Data(), GetOfflineDetName(fCurrentDetector.Data()), GetCurrentRun()));
947
948         AliInfo(Form("filename = %s", filename.Data()));
949
950         AliInfo(Form("Removing remaining local files from run %d and detector %s ...",
951                 GetCurrentRun(), fCurrentDetector.Data()));
952
953         RemoveFile(filename.Data());
954
955 }
956
957 //______________________________________________________________________________________________
958 void AliShuttle::RemoveFile(const char* filename)
959 {
960         //
961         // removes local file
962         //
963
964         TString command(Form("rm -f %s", filename));
965
966         Int_t result = gSystem->Exec(command.Data());
967         if(result != 0)
968         {
969                 Log("SHUTTLE", Form("RemoveFile - %s: Cannot remove file %s!",
970                         fCurrentDetector.Data(), filename));
971         }
972 }
973
974 //______________________________________________________________________________________________
975 AliShuttleStatus* AliShuttle::ReadShuttleStatus()
976 {
977         //
978         // Reads the AliShuttleStatus from the CDB
979         //
980
981         if (fStatusEntry){
982                 delete fStatusEntry;
983                 fStatusEntry = 0;
984         }
985
986         fStatusEntry = AliCDBManager::Instance()->GetStorage(GetLocalCDB())
987                 ->Get(Form("/SHUTTLE/STATUS/%s", fCurrentDetector.Data()), GetCurrentRun());
988
989         if (!fStatusEntry) return 0;
990         fStatusEntry->SetOwner(1);
991
992         AliShuttleStatus* status = dynamic_cast<AliShuttleStatus*> (fStatusEntry->GetObject());
993         if (!status) {
994                 AliError("Invalid object stored to CDB!");
995                 return 0;
996         }
997
998         return status;
999 }
1000
1001 //______________________________________________________________________________________________
1002 Bool_t AliShuttle::WriteShuttleStatus(AliShuttleStatus* status)
1003 {
1004         //
1005         // writes the status for one subdetector
1006         //
1007
1008         if (fStatusEntry){
1009                 delete fStatusEntry;
1010                 fStatusEntry = 0;
1011         }
1012
1013         Int_t run = GetCurrentRun();
1014
1015         AliCDBId id(AliCDBPath("SHUTTLE", "STATUS", fCurrentDetector), run, run);
1016
1017         fStatusEntry = new AliCDBEntry(status, id, new AliCDBMetaData);
1018         fStatusEntry->SetOwner(1);
1019
1020         UInt_t result = AliCDBManager::Instance()->GetStorage(fgkLocalCDB)->Put(fStatusEntry);
1021
1022         if (!result) {
1023                 Log("SHUTTLE", Form("WriteShuttleStatus - Failed for %s, run %d",
1024                                                 fCurrentDetector.Data(), run));
1025                 return kFALSE;
1026         }
1027         
1028         SendMLInfo();
1029
1030         return kTRUE;
1031 }
1032
1033 //______________________________________________________________________________________________
1034 void AliShuttle::UpdateShuttleStatus(AliShuttleStatus::Status newStatus, Bool_t increaseCount)
1035 {
1036         //
1037         // changes the AliShuttleStatus for the given detector and run to the given status
1038         //
1039
1040         if (!fStatusEntry){
1041                 AliError("UNEXPECTED: fStatusEntry empty");
1042                 return;
1043         }
1044
1045         AliShuttleStatus* status = dynamic_cast<AliShuttleStatus*> (fStatusEntry->GetObject());
1046
1047         if (!status){
1048                 Log("SHUTTLE", "UNEXPECTED: status could not be read from current CDB entry");
1049                 return;
1050         }
1051
1052         TString actionStr = Form("UpdateShuttleStatus - %s: Changing state from %s to %s",
1053                                 fCurrentDetector.Data(),
1054                                 status->GetStatusName(),
1055                                 status->GetStatusName(newStatus));
1056         Log("SHUTTLE", actionStr);
1057         SetLastAction(actionStr);
1058
1059         status->SetStatus(newStatus);
1060         if (increaseCount) status->IncreaseCount();
1061
1062         AliCDBManager::Instance()->GetStorage(fgkLocalCDB)->Put(fStatusEntry);
1063
1064         SendMLInfo();
1065 }
1066
1067 //______________________________________________________________________________________________
1068 void AliShuttle::SendMLInfo()
1069 {
1070         //
1071         // sends ML information about the current status of the current detector being processed
1072         //
1073         
1074         AliShuttleStatus* status = dynamic_cast<AliShuttleStatus*> (fStatusEntry->GetObject());
1075         
1076         if (!status){
1077                 Log("SHUTTLE", "SendMLInfo - UNEXPECTED: status could not be read from current CDB entry");
1078                 return;
1079         }
1080         
1081         TMonaLisaText  mlStatus(Form("%s_status", fCurrentDetector.Data()), status->GetStatusName());
1082         TMonaLisaValue mlRetryCount(Form("%s_count", fCurrentDetector.Data()), status->GetCount());
1083
1084         TList mlList;
1085         mlList.Add(&mlStatus);
1086         mlList.Add(&mlRetryCount);
1087
1088         fMonaLisa->SendParameters(&mlList);
1089 }
1090
1091 //______________________________________________________________________________________________
1092 Bool_t AliShuttle::ContinueProcessing()
1093 {
1094         // this function reads the AliShuttleStatus information from CDB and
1095         // checks if the processing should be continued
1096         // if yes it returns kTRUE and updates the AliShuttleStatus with nextStatus
1097
1098         if (!fConfig->HostProcessDetector(fCurrentDetector)) return kFALSE;
1099
1100         AliPreprocessor* aPreprocessor =
1101                 dynamic_cast<AliPreprocessor*> (fPreprocessorMap.GetValue(fCurrentDetector));
1102         if (!aPreprocessor)
1103         {
1104                 AliInfo(Form("%s: no preprocessor registered", fCurrentDetector.Data()));
1105                 return kFALSE;
1106         }
1107
1108         AliShuttleLogbookEntry::Status entryStatus =
1109                 fLogbookEntry->GetDetectorStatus(fCurrentDetector);
1110
1111         if(entryStatus != AliShuttleLogbookEntry::kUnprocessed) {
1112                 AliInfo(Form("ContinueProcessing - %s is %s",
1113                                 fCurrentDetector.Data(),
1114                                 fLogbookEntry->GetDetectorStatusName(entryStatus)));
1115                 return kFALSE;
1116         }
1117
1118         // if we get here, according to Shuttle logbook subdetector is in UNPROCESSED state
1119
1120         // check if current run is first unprocessed run for current detector
1121         if (fConfig->StrictRunOrder(fCurrentDetector) &&
1122                 !fFirstUnprocessed[GetDetPos(fCurrentDetector)])
1123         {
1124                 if (fTestMode == kNone)
1125                 {
1126                         Log("SHUTTLE", Form("ContinueProcessing - %s requires strict run ordering but this is not the first unprocessed run!"));
1127                         return kFALSE;
1128                 }
1129                 else
1130                 {
1131                         Log("SHUTTLE", Form("ContinueProcessing - In TESTMODE - Although %s requires strict run ordering and this is not the first unprocessed run, the SHUTTLE continues"));
1132                 }
1133         }
1134
1135         AliShuttleStatus* status = ReadShuttleStatus();
1136         if (!status) {
1137                 // first time
1138                 Log("SHUTTLE", Form("ContinueProcessing - %s: Processing first time",
1139                                 fCurrentDetector.Data()));
1140                 status = new AliShuttleStatus(AliShuttleStatus::kStarted);
1141                 return WriteShuttleStatus(status);
1142         }
1143
1144         // The following two cases shouldn't happen if Shuttle Logbook was correctly updated.
1145         // If it happens it may mean Logbook updating failed... let's do it now!
1146         if (status->GetStatus() == AliShuttleStatus::kDone ||
1147             status->GetStatus() == AliShuttleStatus::kFailed){
1148                 Log("SHUTTLE", Form("ContinueProcessing - %s is already %s. Updating Shuttle Logbook",
1149                                         fCurrentDetector.Data(),
1150                                         status->GetStatusName(status->GetStatus())));
1151                 UpdateShuttleLogbook(fCurrentDetector.Data(),
1152                                         status->GetStatusName(status->GetStatus()));
1153                 return kFALSE;
1154         }
1155
1156         if (status->GetStatus() == AliShuttleStatus::kStoreError) {
1157                 Log("SHUTTLE",
1158                         Form("ContinueProcessing - %s: Grid storage of one or more objects failed. Trying again now",
1159                                 fCurrentDetector.Data()));
1160                 UpdateShuttleStatus(AliShuttleStatus::kStoreStarted);
1161                 if (StoreOCDB()){
1162                         Log("SHUTTLE", Form("ContinueProcessing - %s: all objects successfully stored into main storage",
1163                                 fCurrentDetector.Data()));
1164                         UpdateShuttleStatus(AliShuttleStatus::kDone);
1165                         UpdateShuttleLogbook(fCurrentDetector.Data(), "DONE");
1166                 } else {
1167                         Log("SHUTTLE",
1168                                 Form("ContinueProcessing - %s: Grid storage failed again",
1169                                         fCurrentDetector.Data()));
1170                         UpdateShuttleStatus(AliShuttleStatus::kStoreError);
1171                 }
1172                 return kFALSE;
1173         }
1174
1175         // if we get here, there is a restart
1176         Bool_t cont = kFALSE;
1177
1178         // abort conditions
1179         if (status->GetCount() >= fConfig->GetMaxRetries()) {
1180                 Log("SHUTTLE", Form("ContinueProcessing - %s failed %d times in status %s - "
1181                                 "Updating Shuttle Logbook", fCurrentDetector.Data(),
1182                                 status->GetCount(), status->GetStatusName()));
1183                 UpdateShuttleLogbook(fCurrentDetector.Data(), "FAILED");
1184                 UpdateShuttleStatus(AliShuttleStatus::kFailed);
1185
1186                 // there may still be objects in local OCDB and reference storage
1187                 // and FXS databases may be not updated: do it now!
1188                 
1189                 // TODO Currently disabled, we want to keep files in case of failure!
1190                 // CleanLocalStorage(fgkLocalCDB);
1191                 // CleanLocalStorage(fgkLocalRefStorage);
1192                 // UpdateTableFailCase();
1193                 
1194                 // Send mail to detector expert!
1195                 AliInfo(Form("Sending mail to %s expert...", fCurrentDetector.Data()));
1196                 if (!SendMail())
1197                         Log("SHUTTLE", Form("ContinueProcessing - Could not send mail to %s expert",
1198                                         fCurrentDetector.Data()));
1199
1200         } else {
1201                 Log("SHUTTLE", Form("ContinueProcessing - %s: restarting. "
1202                                 "Aborted before with %s. Retry number %d.", fCurrentDetector.Data(),
1203                                 status->GetStatusName(), status->GetCount()));
1204                 Bool_t increaseCount = kTRUE;
1205                 if (status->GetStatus() == AliShuttleStatus::kDCSError || status->GetStatus() == AliShuttleStatus::kDCSStarted)
1206                         increaseCount = kFALSE;
1207                 UpdateShuttleStatus(AliShuttleStatus::kStarted, increaseCount);
1208                 cont = kTRUE;
1209         }
1210
1211         return cont;
1212 }
1213
1214 //______________________________________________________________________________________________
1215 Bool_t AliShuttle::Process(AliShuttleLogbookEntry* entry)
1216 {
1217         //
1218         // Makes data retrieval for all detectors in the configuration.
1219         // entry: Shuttle logbook entry, contains run paramenters and status of detectors
1220         // (Unprocessed, Inactive, Failed or Done).
1221         // Returns kFALSE in case of error occured and kTRUE otherwise
1222         //
1223
1224         if (!entry) return kFALSE;
1225
1226         fLogbookEntry = entry;
1227
1228         AliInfo(Form("\n\n \t\t\t^*^*^*^*^*^*^*^*^*^*^*^* run %d: START ^*^*^*^*^*^*^*^*^*^*^*^* \n",
1229                                         GetCurrentRun()));
1230
1231         // create ML instance that monitors this run
1232         fMonaLisa = new TMonaLisaWriter(Form("%d", GetCurrentRun()), "SHUTTLE", "aliendb1.cern.ch");
1233         // disable monitoring of other parameters that come e.g. from TFile
1234         gMonitoringWriter = 0;
1235
1236         // Send the information to ML
1237         TMonaLisaText  mlStatus("SHUTTLE_status", "Processing");
1238         TMonaLisaText  mlRunType("SHUTTLE_runtype", Form("%s (%s)", entry->GetRunType(), entry->GetRunParameter("log")));
1239
1240         TList mlList;
1241         mlList.Add(&mlStatus);
1242         mlList.Add(&mlRunType);
1243
1244         fMonaLisa->SendParameters(&mlList);
1245
1246         if (fLogbookEntry->IsDone())
1247         {
1248                 Log("SHUTTLE","Process - Shuttle is already DONE. Updating logbook");
1249                 UpdateShuttleLogbook("shuttle_done");
1250                 fLogbookEntry = 0;
1251                 return kTRUE;
1252         }
1253
1254         // read test mode if flag is set
1255         if (fReadTestMode)
1256         {
1257                 fTestMode = kNone;
1258                 TString logEntry(entry->GetRunParameter("log"));
1259                 //printf("log entry = %s\n", logEntry.Data());
1260                 TString searchStr("Testmode: ");
1261                 Int_t pos = logEntry.Index(searchStr.Data());
1262                 //printf("%d\n", pos);
1263                 if (pos >= 0)
1264                 {
1265                         TSubString subStr = logEntry(pos + searchStr.Length(), logEntry.Length());
1266                         //printf("%s\n", subStr.String().Data());
1267                         TString newStr(subStr.Data());
1268                         TObjArray* token = newStr.Tokenize(' ');
1269                         if (token)
1270                         {
1271                                 //token->Print();
1272                                 TObjString* tmpStr = dynamic_cast<TObjString*> (token->First());
1273                                 if (tmpStr)
1274                                 {
1275                                         Int_t testMode = tmpStr->String().Atoi();
1276                                         if (testMode > 0)
1277                                         {
1278                                                 Log("SHUTTLE", Form("Enabling test mode %d", testMode));
1279                                                 SetTestMode((TestMode) testMode);
1280                                         }
1281                                 }
1282                                 delete token;          
1283                         }
1284                 }
1285         }
1286         
1287         Log("SHUTTLE", Form("The test mode flag is %d", (Int_t) fTestMode));
1288         
1289         fLogbookEntry->Print("all");
1290
1291         // Initialization
1292         Bool_t hasError = kFALSE;
1293
1294         AliCDBStorage *mainCDBSto = AliCDBManager::Instance()->GetStorage(fgkMainCDB);
1295         if(mainCDBSto) mainCDBSto->QueryCDB(GetCurrentRun());
1296         AliCDBStorage *mainRefSto = AliCDBManager::Instance()->GetStorage(fgkMainRefStorage);
1297         if(mainRefSto) mainRefSto->QueryCDB(GetCurrentRun());
1298
1299         // Loop on detectors in the configuration
1300         TIter iter(fConfig->GetDetectors());
1301         TObjString* aDetector = 0;
1302
1303         while ((aDetector = (TObjString*) iter.Next()))
1304         {
1305                 fCurrentDetector = aDetector->String();
1306
1307                 if (ContinueProcessing() == kFALSE) continue;
1308
1309                 AliInfo(Form("\n\n \t\t\t****** run %d - %s: START  ******",
1310                                                 GetCurrentRun(), aDetector->GetName()));
1311
1312                 for(Int_t iSys=0;iSys<3;iSys++) fFXSCalled[iSys]=kFALSE;
1313
1314                 Log(fCurrentDetector.Data(), "Starting processing");
1315
1316                 Int_t pid = fork();
1317
1318                 if (pid < 0)
1319                 {
1320                         Log("SHUTTLE", "ERROR: Forking failed");
1321                 }
1322                 else if (pid > 0)
1323                 {
1324                         // parent
1325                         AliInfo(Form("In parent process of %d - %s: Starting monitoring",
1326                                                         GetCurrentRun(), aDetector->GetName()));
1327
1328                         Long_t begin = time(0);
1329
1330                         int status; // to be used with waitpid, on purpose an int (not Int_t)!
1331                         while (waitpid(pid, &status, WNOHANG) == 0)
1332                         {
1333                                 Long_t expiredTime = time(0) - begin;
1334
1335                                 if (expiredTime > fConfig->GetPPTimeOut())
1336                                 {
1337                                         TString tmp;
1338                                         tmp.Form("Process of %s time out. Run time: %d seconds. Killing...",
1339                                                                 fCurrentDetector.Data(), expiredTime);
1340                                         Log("SHUTTLE", tmp);
1341                                         Log(fCurrentDetector, tmp);
1342
1343                                         kill(pid, 9);
1344
1345                                         UpdateShuttleStatus(AliShuttleStatus::kPPTimeOut);
1346                                         hasError = kTRUE;
1347
1348                                         gSystem->Sleep(1000);
1349                                 }
1350                                 else
1351                                 {
1352                                         gSystem->Sleep(1000);
1353                                         
1354                                         TString checkStr;
1355                                         checkStr.Form("ps -o vsize --pid %d | tail -n 1", pid);
1356                                         FILE* pipe = gSystem->OpenPipe(checkStr, "r");
1357                                         if (!pipe)
1358                                         {
1359                                                 Log("SHUTTLE", Form("Error: Could not open pipe to %s", checkStr.Data()));
1360                                                 continue;
1361                                         }
1362                                                 
1363                                         char buffer[100];
1364                                         if (!fgets(buffer, 100, pipe))
1365                                         {
1366                                                 Log("SHUTTLE", "Error: ps did not return anything");
1367                                                 gSystem->ClosePipe(pipe);
1368                                                 continue;
1369                                         }
1370                                         gSystem->ClosePipe(pipe);
1371                                         
1372                                         //Log("SHUTTLE", Form("ps returned %s", buffer));
1373                                         
1374                                         Int_t mem = 0;
1375                                         if ((sscanf(buffer, "%d\n", &mem) != 1) || !mem)
1376                                         {
1377                                                 Log("SHUTTLE", "Error: Could not parse output of ps");
1378                                                 continue;
1379                                         }
1380                                         
1381                                         if (expiredTime % 60 == 0)
1382                                                 Log("SHUTTLE", Form("%s: Checking process. Run time: %d seconds - Memory consumption: %d KB",
1383                                                                 fCurrentDetector.Data(), expiredTime, mem));
1384                                         
1385                                         if (mem > fConfig->GetPPMaxMem())
1386                                         {
1387                                                 TString tmp;
1388                                                 tmp.Form("Process exceeds maximum allowed memory (%d KB > %d KB). Killing...",
1389                                                         mem, fConfig->GetPPMaxMem());
1390                                                 Log("SHUTTLE", tmp);
1391                                                 Log(fCurrentDetector, tmp);
1392         
1393                                                 kill(pid, 9);
1394         
1395                                                 UpdateShuttleStatus(AliShuttleStatus::kPPOutOfMemory);
1396                                                 hasError = kTRUE;
1397         
1398                                                 gSystem->Sleep(1000);
1399                                         }
1400                                 }
1401                         }
1402
1403                         AliInfo(Form("In parent process of %d - %s: Client has terminated.",
1404                                                                 GetCurrentRun(), aDetector->GetName()));
1405
1406                         if (WIFEXITED(status))
1407                         {
1408                                 Int_t returnCode = WEXITSTATUS(status);
1409
1410                                 Log("SHUTTLE", Form("%s: the return code is %d", fCurrentDetector.Data(),
1411                                                                                 returnCode));
1412
1413                                 if (returnCode == 0) hasError = kTRUE;
1414                         }
1415                 }
1416                 else if (pid == 0)
1417                 {
1418                         // client
1419                         AliInfo(Form("In client process of %d - %s", GetCurrentRun(), aDetector->GetName()));
1420
1421                         AliInfo("Redirecting output...");
1422
1423                         if ((freopen(GetLogFileName(fCurrentDetector), "a", stdout)) == 0)
1424                         {
1425                                 Log("SHUTTLE", "Could not freopen stdout");
1426                         }
1427                         else
1428                         {
1429                                 fOutputRedirected = kTRUE;
1430                                 if ((dup2(fileno(stdout), fileno(stderr))) < 0)
1431                                         Log("SHUTTLE", "Could not redirect stderr");
1432                                 
1433                         }
1434                         
1435                         TString wd = gSystem->WorkingDirectory();
1436                         TString tmpDir = Form("%s/%s_process",GetShuttleTempDir(),fCurrentDetector.Data());
1437                         
1438                         gSystem->mkdir(tmpDir.Data());
1439                         gSystem->ChangeDirectory(tmpDir.Data());
1440                         
1441                         Bool_t success = ProcessCurrentDetector();
1442                         
1443                         gSystem->ChangeDirectory(wd.Data());
1444                         
1445                         gSystem->Exec(Form("rm -rf %s",tmpDir.Data()));
1446                         
1447                         if (success) // Preprocessor finished successfully!
1448                         { 
1449                                 // Update time_processed field in FXS DB
1450                                 if (UpdateTable() == kFALSE)
1451                                         Log("SHUTTLE", Form("Process - %s: Could not update FXS databases!", 
1452                                                         fCurrentDetector.Data()));
1453
1454                                 // Transfer the data from local storage to main storage (Grid)
1455                                 UpdateShuttleStatus(AliShuttleStatus::kStoreStarted);
1456                                 if (StoreOCDB() == kFALSE)
1457                                 {
1458                                         AliInfo(Form("\n \t\t\t****** run %d - %s: STORAGE ERROR ****** \n\n",
1459                                                         GetCurrentRun(), aDetector->GetName()));
1460                                         UpdateShuttleStatus(AliShuttleStatus::kStoreError);
1461                                         success = kFALSE;
1462                                 } else {
1463                                         AliInfo(Form("\n \t\t\t****** run %d - %s: DONE ****** \n\n",
1464                                                         GetCurrentRun(), aDetector->GetName()));
1465                                         UpdateShuttleStatus(AliShuttleStatus::kDone);
1466                                         UpdateShuttleLogbook(fCurrentDetector, "DONE");
1467                                 }
1468                         }
1469
1470                         for (UInt_t iSys=0; iSys<3; iSys++)
1471                         {
1472                                 if (fFXSCalled[iSys]) fFXSlist[iSys].Clear();
1473                         }
1474
1475                         AliInfo(Form("Client process of %d - %s is exiting now with %d.",
1476                                                         GetCurrentRun(), aDetector->GetName(), success));
1477
1478                         // the client exits here
1479                         gSystem->Exit(success);
1480
1481                         AliError("We should never get here!!!");
1482                 }
1483         }
1484
1485         AliInfo(Form("\n\n \t\t\t^*^*^*^*^*^*^*^*^*^*^*^* run %d: FINISH ^*^*^*^*^*^*^*^*^*^*^*^* \n",
1486                                                         GetCurrentRun()));
1487
1488         //check if shuttle is done for this run, if so update logbook
1489         TObjArray checkEntryArray;
1490         checkEntryArray.SetOwner(1);
1491         TString whereClause = Form("where run=%d", GetCurrentRun());
1492         if (!QueryShuttleLogbook(whereClause.Data(), checkEntryArray) || checkEntryArray.GetEntries() == 0) {
1493                 Log("SHUTTLE", Form("Process - Warning: Cannot check status of run %d on Shuttle logbook!",
1494                                                 GetCurrentRun()));
1495                 return hasError == kFALSE;
1496         }
1497
1498         AliShuttleLogbookEntry* checkEntry = dynamic_cast<AliShuttleLogbookEntry*>
1499                                                 (checkEntryArray.At(0));
1500
1501         if (checkEntry)
1502         {
1503                 if (checkEntry->IsDone())
1504                 {
1505                         Log("SHUTTLE","Process - Shuttle is DONE. Updating logbook");
1506                         UpdateShuttleLogbook("shuttle_done");
1507                 }
1508                 else
1509                 {
1510                         for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
1511                         {
1512                                 if (checkEntry->GetDetectorStatus(iDet) == AliShuttleLogbookEntry::kUnprocessed)
1513                                 {
1514                                         AliDebug(2, Form("Run %d: setting %s as \"not first time unprocessed\"",
1515                                                         checkEntry->GetRun(), GetDetName(iDet)));
1516                                         fFirstUnprocessed[iDet] = kFALSE;
1517                                 }
1518                         }
1519                 }
1520         }
1521
1522         // remove ML instance
1523         delete fMonaLisa;
1524         fMonaLisa = 0;
1525
1526         fLogbookEntry = 0;
1527
1528         return hasError == kFALSE;
1529 }
1530
1531 //______________________________________________________________________________________________
1532 Bool_t AliShuttle::ProcessCurrentDetector()
1533 {
1534         //
1535         // Makes data retrieval just for a specific detector (fCurrentDetector).
1536         // Threre should be a configuration for this detector.
1537
1538         Log("SHUTTLE", Form("ProcessCurrentDetector - Retrieving values for %s, run %d", 
1539                                                 fCurrentDetector.Data(), GetCurrentRun()));
1540
1541         if (!CleanReferenceStorage(fCurrentDetector.Data()))
1542                 return kFALSE;
1543
1544         TMap* dcsMap = new TMap();
1545
1546         // call preprocessor
1547         AliPreprocessor* aPreprocessor =
1548                 dynamic_cast<AliPreprocessor*> (fPreprocessorMap.GetValue(fCurrentDetector));
1549
1550         aPreprocessor->Initialize(GetCurrentRun(), GetCurrentStartTime(), GetCurrentEndTime());
1551
1552         Bool_t processDCS = aPreprocessor->ProcessDCS();
1553
1554         if (!processDCS)
1555         {
1556                 Log(fCurrentDetector, "ProcessCurrentDetector -"
1557                         " The preprocessor requested to skip the retrieval of DCS values");
1558         }
1559         else if (fTestMode & kSkipDCS)
1560         {
1561                 Log(fCurrentDetector, "ProcessCurrentDetector - In TESTMODE: Skipping DCS processing");
1562         } 
1563         else if (fTestMode & kErrorDCS)
1564         {
1565                 Log(fCurrentDetector, "ProcessCurrentDetector - In TESTMODE: Simulating DCS error");
1566                 UpdateShuttleStatus(AliShuttleStatus::kDCSStarted);
1567                 UpdateShuttleStatus(AliShuttleStatus::kDCSError);
1568                 delete dcsMap;
1569                 return kFALSE;
1570         } else {
1571
1572                 UpdateShuttleStatus(AliShuttleStatus::kDCSStarted);
1573
1574                 // Query DCS archive
1575                 Int_t nServers = fConfig->GetNServers(fCurrentDetector);
1576                 Log("SHUTTLE", Form("ProcessCurrentDetector -"
1577                                 " found %d Amanda servers for %s", nServers, fCurrentDetector.Data()));
1578                 
1579                 for (int iServ=0; iServ<nServers; iServ++)
1580                 {
1581                 
1582                         TString host(fConfig->GetDCSHost(fCurrentDetector, iServ));
1583                         Int_t port = fConfig->GetDCSPort(fCurrentDetector, iServ);
1584                         
1585                         TMap* aliasMap = 0;
1586                         TMap* dpMap = 0;
1587         
1588                         if (fConfig->GetDCSAliases(fCurrentDetector, iServ)->GetEntries() > 0)
1589                         {
1590                                 aliasMap = GetValueSet(host, port, 
1591                                                 fConfig->GetDCSAliases(fCurrentDetector, iServ), kAlias);
1592                                 if (!aliasMap)
1593                                 {
1594                                         Log(fCurrentDetector, 
1595                                                 Form("ProcessCurrentDetector -"
1596                                                         " Error retrieving DCS aliases from server %s", 
1597                                                                 host.Data()));
1598                                         UpdateShuttleStatus(AliShuttleStatus::kDCSError);
1599                                         delete dcsMap;
1600                                         return kFALSE;
1601                                 }
1602                         }
1603                         
1604                         if (fConfig->GetDCSDataPoints(fCurrentDetector, iServ)->GetEntries() > 0)
1605                         {
1606                                 dpMap = GetValueSet(host, port, 
1607                                                 fConfig->GetDCSDataPoints(fCurrentDetector, iServ), kDP);
1608                                 if (!dpMap)
1609                                 {
1610                                         Log(fCurrentDetector, 
1611                                                 Form("ProcessCurrentDetector -"
1612                                                         " Error retrieving DCS data points from server %s", 
1613                                                                 host.Data()));
1614                                         UpdateShuttleStatus(AliShuttleStatus::kDCSError);
1615                                         if (aliasMap) delete aliasMap;
1616                                         delete dcsMap;
1617                                         return kFALSE;
1618                                 }                               
1619                         }
1620                         
1621                         // merge aliasMap and dpMap into dcsMap
1622                         if(aliasMap) {
1623                                 TIter iter(aliasMap);
1624                                 TObjString* key = 0;
1625                                 while ((key = (TObjString*) iter.Next()))
1626                                         dcsMap->Add(key, aliasMap->GetValue(key->String()));
1627                                 
1628                                 aliasMap->SetOwner(kFALSE);
1629                                 delete aliasMap;
1630                         }       
1631                         
1632                         if(dpMap) {
1633                                 TIter iter(dpMap);
1634                                 TObjString* key = 0;
1635                                 while ((key = (TObjString*) iter.Next()))
1636                                         dcsMap->Add(key, dpMap->GetValue(key->String()));
1637                                 
1638                                 dpMap->SetOwner(kFALSE);
1639                                 delete dpMap;
1640                         }
1641                 }
1642         }
1643         
1644         // DCS Archive DB processing successful. Call Preprocessor!
1645         UpdateShuttleStatus(AliShuttleStatus::kPPStarted);
1646
1647         UInt_t returnValue = aPreprocessor->Process(dcsMap);
1648
1649         if (returnValue > 0) // Preprocessor error!
1650         {
1651                 Log(fCurrentDetector, Form("Preprocessor failed. Process returned %d.", returnValue));
1652                 UpdateShuttleStatus(AliShuttleStatus::kPPError);
1653                 dcsMap->DeleteAll();
1654                 delete dcsMap;
1655                 return kFALSE;
1656         }
1657         
1658         // preprocessor ok!
1659         UpdateShuttleStatus(AliShuttleStatus::kPPDone);
1660         Log(fCurrentDetector, Form("ProcessCurrentDetector - %s preprocessor returned success",
1661                                 fCurrentDetector.Data()));
1662
1663         dcsMap->DeleteAll();
1664         delete dcsMap;
1665
1666         return kTRUE;
1667 }
1668
1669 //______________________________________________________________________________________________
1670 Bool_t AliShuttle::QueryShuttleLogbook(const char* whereClause,
1671                 TObjArray& entries)
1672 {
1673         // Query DAQ's Shuttle logbook and fills detector status object.
1674         // Call QueryRunParameters to query DAQ logbook for run parameters.
1675         //
1676
1677         entries.SetOwner(1);
1678
1679         // check connection, in case connect
1680         if(!Connect(3)) return kFALSE;
1681
1682         TString sqlQuery;
1683         sqlQuery = Form("select * from %s %s order by run", fConfig->GetShuttlelbTable(), whereClause);
1684
1685         TSQLResult* aResult = fServer[3]->Query(sqlQuery);
1686         if (!aResult) {
1687                 AliError(Form("Can't execute query <%s>!", sqlQuery.Data()));
1688                 return kFALSE;
1689         }
1690
1691         AliDebug(2,Form("Query = %s", sqlQuery.Data()));
1692
1693         if(aResult->GetRowCount() == 0) {
1694                 AliInfo("No entries in Shuttle Logbook match request");
1695                 delete aResult;
1696                 return kTRUE;
1697         }
1698
1699         // TODO Check field count!
1700         const UInt_t nCols = 23;
1701         if (aResult->GetFieldCount() != (Int_t) nCols) {
1702                 AliError("Invalid SQL result field number!");
1703                 delete aResult;
1704                 return kFALSE;
1705         }
1706
1707         TSQLRow* aRow;
1708         while ((aRow = aResult->Next())) {
1709                 TString runString(aRow->GetField(0), aRow->GetFieldLength(0));
1710                 Int_t run = runString.Atoi();
1711
1712                 AliShuttleLogbookEntry *entry = QueryRunParameters(run);
1713                 if (!entry)
1714                         continue;
1715
1716                 // loop on detectors
1717                 for(UInt_t ii = 0; ii < nCols; ii++)
1718                         entry->SetDetectorStatus(aResult->GetFieldName(ii), aRow->GetField(ii));
1719
1720                 entries.AddLast(entry);
1721                 delete aRow;
1722         }
1723
1724         delete aResult;
1725         return kTRUE;
1726 }
1727
1728 //______________________________________________________________________________________________
1729 AliShuttleLogbookEntry* AliShuttle::QueryRunParameters(Int_t run)
1730 {
1731         //
1732         // Retrieve run parameters written in the DAQ logbook and sets them into AliShuttleLogbookEntry object
1733         //
1734
1735         // check connection, in case connect
1736         if (!Connect(3))
1737                 return 0;
1738
1739         TString sqlQuery;
1740         sqlQuery.Form("select * from %s where run=%d", fConfig->GetDAQlbTable(), run);
1741
1742         TSQLResult* aResult = fServer[3]->Query(sqlQuery);
1743         if (!aResult) {
1744                 AliError(Form("Can't execute query <%s>!", sqlQuery.Data()));
1745                 return 0;
1746         }
1747
1748         if (aResult->GetRowCount() == 0) {
1749                 Log("SHUTTLE", Form("QueryRunParameters - No entry in DAQ Logbook for run %d. Skipping", run));
1750                 delete aResult;
1751                 return 0;
1752         }
1753
1754         if (aResult->GetRowCount() > 1) {
1755                 AliError(Form("More than one entry in DAQ Logbook for run %d. Skipping", run));
1756                 delete aResult;
1757                 return 0;
1758         }
1759
1760         TSQLRow* aRow = aResult->Next();
1761         if (!aRow)
1762         {
1763                 AliError(Form("Could not retrieve row for run %d. Skipping", run));
1764                 delete aResult;
1765                 return 0;
1766         }
1767
1768         AliShuttleLogbookEntry* entry = new AliShuttleLogbookEntry(run);
1769
1770         for (Int_t ii = 0; ii < aResult->GetFieldCount(); ii++)
1771                 entry->SetRunParameter(aResult->GetFieldName(ii), aRow->GetField(ii));
1772
1773         UInt_t startTime = entry->GetStartTime();
1774         UInt_t endTime = entry->GetEndTime();
1775
1776         if (!startTime || !endTime || startTime > endTime) {
1777                 Log("SHUTTLE",
1778                         Form("QueryRunParameters - Invalid parameters for Run %d: startTime = %d, endTime = %d",
1779                                 run, startTime, endTime));
1780                 delete entry;
1781                 delete aRow;
1782                 delete aResult;
1783                 return 0;
1784         }
1785
1786         delete aRow;
1787         delete aResult;
1788
1789         return entry;
1790 }
1791
1792 //______________________________________________________________________________________________
1793 Bool_t AliShuttle::GetValueSet(const char* host, Int_t port, const char* entry,
1794                                 TObjArray* valueSet, DCSType type)
1795 {
1796         // Retrieve all "entry" data points from the DCS server
1797         // host, port: TSocket connection parameters
1798         // entry: name of the alias or data point
1799         // valueSet: array of retrieved AliDCSValue's
1800         // type: kAlias or kDP
1801
1802         // TODO The last parameter switches from single query to multy query!
1803         AliDCSClient client(host, port, fTimeout, fRetries, 1);
1804         if (!client.IsConnected())
1805         {
1806                 return kFALSE;
1807         }
1808
1809         Int_t result=0;
1810
1811         if (type == kAlias)
1812         {
1813                 result = client.GetAliasValues(entry,
1814                         GetCurrentStartTime(), GetCurrentEndTime(), valueSet);
1815         } else
1816         if (type == kDP)
1817         {
1818                 result = client.GetDPValues(entry,
1819                         GetCurrentStartTime(), GetCurrentEndTime(), valueSet);
1820         }
1821
1822         if (result < 0)
1823         {
1824                 Log(fCurrentDetector.Data(), Form("GetValueSet - Can't get '%s'! Reason: %s",
1825                         entry, AliDCSClient::GetErrorString(result)));
1826
1827                 if (result == AliDCSClient::fgkServerError)
1828                 {
1829                         Log(fCurrentDetector.Data(), Form("GetValueSet - Server error: %s",
1830                                 client.GetServerError().Data()));
1831                 }
1832
1833                 return kFALSE;
1834         }
1835
1836         return kTRUE;
1837 }
1838
1839 //______________________________________________________________________________________________
1840 TMap* AliShuttle::GetValueSet(const char* host, Int_t port, const TSeqCollection* entries,
1841                               DCSType type)
1842 {
1843         // Retrieve all "entry" data points from the DCS server
1844         // host, port: TSocket connection parameters
1845         // entries: list of name of the alias or data point
1846         // type: kAlias or kDP
1847         // returns TMap of values, 0 when failure
1848
1849         AliDCSClient client(host, port, fTimeout, fRetries);
1850
1851         TMap* result = 0;
1852         if (type == kAlias)
1853         {
1854                 result = client.GetAliasValues(entries, GetCurrentStartTime(), 
1855                         GetCurrentEndTime());
1856         } 
1857         else if (type == kDP)
1858         {
1859                 result = client.GetDPValues(entries, GetCurrentStartTime(), 
1860                         GetCurrentEndTime());
1861         }
1862
1863         if (result == 0)
1864         {
1865                 Log(fCurrentDetector.Data(), Form("GetValueSet - Can't get entries! Reason: %s",
1866                         client.GetServerError().Data()));
1867
1868                 return 0;
1869         }
1870                 
1871         return result;
1872 }
1873
1874 //______________________________________________________________________________________________
1875 const char* AliShuttle::GetFile(Int_t system, const char* detector,
1876                 const char* id, const char* source)
1877 {
1878         // Get calibration file from file exchange servers
1879         // First queris the FXS database for the file name, using the run, detector, id and source info
1880         // then calls RetrieveFile(filename) for actual copy to local disk
1881         // run: current run being processed (given by Logbook entry fLogbookEntry)
1882         // detector: the Preprocessor name
1883         // id: provided as a parameter by the Preprocessor
1884         // source: provided by the Preprocessor through GetFileSources function
1885
1886         // check if test mode should simulate a FXS error
1887         if (fTestMode & kErrorFXSFiles)
1888         {
1889                 Log(detector, Form("GetFile - In TESTMODE - Simulating error while connecting to %s FXS", GetSystemName(system)));
1890                 return 0;
1891         }
1892         
1893         // check connection, in case connect
1894         if (!Connect(system))
1895         {
1896                 Log(detector, Form("GetFile - Couldn't connect to %s FXS database", GetSystemName(system)));
1897                 return 0;
1898         }
1899
1900         // Query preparation
1901         TString sourceName(source);
1902         Int_t nFields = 3;
1903         TString sqlQueryStart = Form("select filePath,size,fileChecksum from %s where",
1904                                                                 fConfig->GetFXSdbTable(system));
1905         TString whereClause = Form("run=%d and detector=\"%s\" and fileId=\"%s\"",
1906                                                                 GetCurrentRun(), detector, id);
1907
1908         if (system == kDAQ)
1909         {
1910                 whereClause += Form(" and DAQsource=\"%s\"", source);
1911         }
1912         else if (system == kDCS)
1913         {
1914                 sourceName="none";
1915         }
1916         else if (system == kHLT)
1917         {
1918                 whereClause += Form(" and DDLnumbers=\"%s\"", source);
1919                 nFields = 3;
1920         }
1921
1922         TString sqlQuery = Form("%s %s", sqlQueryStart.Data(), whereClause.Data());
1923
1924         AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
1925
1926         // Query execution
1927         TSQLResult* aResult = 0;
1928         aResult = dynamic_cast<TSQLResult*> (fServer[system]->Query(sqlQuery));
1929         if (!aResult) {
1930                 Log(detector, Form("GetFileName - Can't execute SQL query to %s database for: id = %s, source = %s",
1931                                 GetSystemName(system), id, sourceName.Data()));
1932                 return 0;
1933         }
1934
1935         if(aResult->GetRowCount() == 0)
1936         {
1937                 Log(detector,
1938                         Form("GetFileName - No entry in %s FXS db for: id = %s, source = %s",
1939                                 GetSystemName(system), id, sourceName.Data()));
1940                 delete aResult;
1941                 return 0;
1942         }
1943
1944         if (aResult->GetRowCount() > 1) {
1945                 Log(detector,
1946                         Form("GetFileName - More than one entry in %s FXS db for: id = %s, source = %s",
1947                                 GetSystemName(system), id, sourceName.Data()));
1948                 delete aResult;
1949                 return 0;
1950         }
1951
1952         if (aResult->GetFieldCount() != nFields) {
1953                 Log(detector,
1954                         Form("GetFileName - Wrong field count in %s FXS db for: id = %s, source = %s",
1955                                 GetSystemName(system), id, sourceName.Data()));
1956                 delete aResult;
1957                 return 0;
1958         }
1959
1960         TSQLRow* aRow = dynamic_cast<TSQLRow*> (aResult->Next());
1961
1962         if (!aRow){
1963                 Log(detector, Form("GetFileName - Empty set result in %s FXS db from query: id = %s, source = %s",
1964                                 GetSystemName(system), id, sourceName.Data()));
1965                 delete aResult;
1966                 return 0;
1967         }
1968
1969         TString filePath(aRow->GetField(0), aRow->GetFieldLength(0));
1970         TString fileSize(aRow->GetField(1), aRow->GetFieldLength(1));
1971         TString fileChecksum(aRow->GetField(2), aRow->GetFieldLength(2));
1972
1973         delete aResult;
1974         delete aRow;
1975
1976         AliDebug(2, Form("filePath = %s; size = %s, fileChecksum = %s",
1977                                 filePath.Data(), fileSize.Data(), fileChecksum.Data()));
1978
1979         // retrieved file is renamed to make it unique
1980         TString localFileName = Form("%s_%s_%d_%s_%s.shuttle",
1981                                         GetSystemName(system), detector, GetCurrentRun(), id, sourceName.Data());
1982
1983
1984         // file retrieval from FXS
1985         UInt_t nRetries = 0;
1986         UInt_t maxRetries = 3;
1987         Bool_t result = kFALSE;
1988
1989         // copy!! if successful TSystem::Exec returns 0
1990         while(nRetries++ < maxRetries) {
1991                 AliDebug(2, Form("Trying to copy file. Retry # %d", nRetries));
1992                 result = RetrieveFile(system, filePath.Data(), localFileName.Data());
1993                 if(!result)
1994                 {
1995                         Log(detector, Form("GetFileName - Copy of file %s from %s FXS failed",
1996                                         filePath.Data(), GetSystemName(system)));
1997                         continue;
1998                 } 
1999
2000                 if (fileChecksum.Length()>0)
2001                 {
2002                         // compare md5sum of local file with the one stored in the FXS DB
2003                         Int_t md5Comp = gSystem->Exec(Form("md5sum %s/%s |grep %s 2>&1 > /dev/null",
2004                                                 GetShuttleTempDir(), localFileName.Data(), fileChecksum.Data()));
2005
2006                         if (md5Comp != 0)
2007                         {
2008                                 Log(detector, Form("GetFileName - md5sum of file %s does not match with local copy!",
2009                                                         filePath.Data()));
2010                                 result = kFALSE;
2011                                 continue;
2012                         }
2013                 } else {
2014                         Log(fCurrentDetector, Form("GetFile - md5sum of file %s not set in %s database, skipping comparison",
2015                                                         filePath.Data(), GetSystemName(system)));
2016                 }
2017                 if (result) break;
2018         }
2019
2020         if(!result) return 0;
2021
2022         fFXSCalled[system]=kTRUE;
2023         TObjString *fileParams = new TObjString(Form("%s#!?!#%s", id, sourceName.Data()));
2024         fFXSlist[system].Add(fileParams);
2025
2026         static TString fullLocalFileName;
2027         fullLocalFileName.Form("%s/%s", GetShuttleTempDir(), localFileName.Data());
2028
2029         Log(fCurrentDetector, Form("GetFile - Retrieved file with id %s and source %s from %s to %s", id, source, GetSystemName(system), fullLocalFileName.Data()));
2030
2031         return fullLocalFileName.Data();
2032 }
2033
2034 //______________________________________________________________________________________________
2035 Bool_t AliShuttle::RetrieveFile(UInt_t system, const char* fxsFileName, const char* localFileName)
2036 {
2037         //
2038         // Copies file from FXS to local Shuttle machine
2039         //
2040
2041         // check temp directory: trying to cd to temp; if it does not exist, create it
2042         AliDebug(2, Form("Copy file %s from %s FXS into %s/%s",
2043                         GetSystemName(system), fxsFileName, GetShuttleTempDir(), localFileName));
2044
2045         void* dir = gSystem->OpenDirectory(GetShuttleTempDir());
2046         if (dir == NULL) {
2047                 if (gSystem->mkdir(GetShuttleTempDir(), kTRUE)) {
2048                         AliError(Form("Can't open directory <%s>", GetShuttleTempDir()));
2049                         return kFALSE;
2050                 }
2051
2052         } else {
2053                 gSystem->FreeDirectory(dir);
2054         }
2055
2056         TString baseFXSFolder;
2057         if (system == kDAQ)
2058         {
2059                 baseFXSFolder = "FES/";
2060         }
2061         else if (system == kDCS)
2062         {
2063                 baseFXSFolder = "";
2064         }
2065         else if (system == kHLT)
2066         {
2067                 baseFXSFolder = "/opt/FXS/";
2068         }
2069
2070
2071         TString command = Form("scp -oPort=%d -2 %s@%s:%s%s %s/%s",
2072                 fConfig->GetFXSPort(system),
2073                 fConfig->GetFXSUser(system),
2074                 fConfig->GetFXSHost(system),
2075                 baseFXSFolder.Data(),
2076                 fxsFileName,
2077                 GetShuttleTempDir(),
2078                 localFileName);
2079
2080         AliDebug(2, Form("%s",command.Data()));
2081
2082         Bool_t result = (gSystem->Exec(command.Data()) == 0);
2083
2084         return result;
2085 }
2086
2087 //______________________________________________________________________________________________
2088 TList* AliShuttle::GetFileSources(Int_t system, const char* detector, const char* id)
2089 {
2090         //
2091         // Get sources producing the condition file Id from file exchange servers
2092         // if id is NULL all sources are returned (distinct)
2093         //
2094
2095         Log(detector, Form("GetFileSources - Retrieving sources with id %s from %s", id, GetSystemName(system)));
2096         
2097         // check if test mode should simulate a FXS error
2098         if (fTestMode & kErrorFXSSources)
2099         {
2100                 Log(detector, Form("GetFileSources - In TESTMODE - Simulating error while connecting to %s FXS", GetSystemName(system)));
2101                 return 0;
2102         }
2103
2104         if (system == kDCS)
2105         {
2106                 AliWarning("DCS system has only one source of data!");
2107                 TList *list = new TList();
2108                 list->SetOwner(1);
2109                 list->Add(new TObjString(" "));
2110                 return list;
2111         }
2112
2113         // check connection, in case connect
2114         if (!Connect(system))
2115         {
2116                 Log(detector, Form("GetFileSources - Couldn't connect to %s FXS database", GetSystemName(system)));
2117                 return NULL;
2118         }
2119
2120         TString sourceName = 0;
2121         if (system == kDAQ)
2122         {
2123                 sourceName = "DAQsource";
2124         } else if (system == kHLT)
2125         {
2126                 sourceName = "DDLnumbers";
2127         }
2128
2129         TString sqlQueryStart = Form("select distinct %s from %s where", sourceName.Data(), fConfig->GetFXSdbTable(system));
2130         TString whereClause = Form("run=%d and detector=\"%s\"",
2131                                 GetCurrentRun(), detector);
2132         if (id)
2133                 whereClause += Form(" and fileId=\"%s\"", id);
2134         TString sqlQuery = Form("%s %s", sqlQueryStart.Data(), whereClause.Data());
2135
2136         AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
2137
2138         // Query execution
2139         TSQLResult* aResult;
2140         aResult = fServer[system]->Query(sqlQuery);
2141         if (!aResult) {
2142                 Log(detector, Form("GetFileSources - Can't execute SQL query to %s database for id: %s",
2143                                 GetSystemName(system), id));
2144                 return 0;
2145         }
2146
2147         TList *list = new TList();
2148         list->SetOwner(1);
2149         
2150         if (aResult->GetRowCount() == 0)
2151         {
2152                 Log(detector,
2153                         Form("GetFileSources - No entry in %s FXS table for id: %s", GetSystemName(system), id));
2154                 delete aResult;
2155                 return list;
2156         }
2157
2158         Log(detector, Form("GetFileSources - Found %d sources", aResult->GetRowCount()));
2159
2160         TSQLRow* aRow;
2161         while ((aRow = aResult->Next()))
2162         {
2163
2164                 TString source(aRow->GetField(0), aRow->GetFieldLength(0));
2165                 AliDebug(2, Form("%s = %s", sourceName.Data(), source.Data()));
2166                 list->Add(new TObjString(source));
2167                 delete aRow;
2168         }
2169
2170         delete aResult;
2171
2172         return list;
2173 }
2174
2175 //______________________________________________________________________________________________
2176 TList* AliShuttle::GetFileIDs(Int_t system, const char* detector, const char* source)
2177 {
2178         //
2179         // Get all ids of condition files produced by a given source from file exchange servers
2180         //
2181         
2182         Log(detector, Form("GetFileIDs - Retrieving ids with source %s with %s", source, GetSystemName(system)));
2183
2184         // check if test mode should simulate a FXS error
2185         if (fTestMode & kErrorFXSSources)
2186         {
2187                 Log(detector, Form("GetFileIDs - In TESTMODE - Simulating error while connecting to %s FXS", GetSystemName(system)));
2188                 return 0;
2189         }
2190
2191         // check connection, in case connect
2192         if (!Connect(system))
2193         {
2194                 Log(detector, Form("GetFileIDs - Couldn't connect to %s FXS database", GetSystemName(system)));
2195                 return NULL;
2196         }
2197
2198         TString sourceName = 0;
2199         if (system == kDAQ)
2200         {
2201                 sourceName = "DAQsource";
2202         } else if (system == kHLT)
2203         {
2204                 sourceName = "DDLnumbers";
2205         }
2206
2207         TString sqlQueryStart = Form("select fileId from %s where", fConfig->GetFXSdbTable(system));
2208         TString whereClause = Form("run=%d and detector=\"%s\"",
2209                                 GetCurrentRun(), detector);
2210         if (sourceName.Length() > 0 && source)
2211                 whereClause += Form(" and %s=\"%s\"", sourceName.Data(), source);
2212         TString sqlQuery = Form("%s %s", sqlQueryStart.Data(), whereClause.Data());
2213
2214         AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
2215
2216         // Query execution
2217         TSQLResult* aResult;
2218         aResult = fServer[system]->Query(sqlQuery);
2219         if (!aResult) {
2220                 Log(detector, Form("GetFileIDs - Can't execute SQL query to %s database for source: %s",
2221                                 GetSystemName(system), source));
2222                 return 0;
2223         }
2224
2225         TList *list = new TList();
2226         list->SetOwner(1);
2227         
2228         if (aResult->GetRowCount() == 0)
2229         {
2230                 Log(detector,
2231                         Form("GetFileIDs - No entry in %s FXS table for source: %s", GetSystemName(system), source));
2232                 delete aResult;
2233                 return list;
2234         }
2235
2236         Log(detector, Form("GetFileIDs - Found %d ids", aResult->GetRowCount()));
2237
2238         TSQLRow* aRow;
2239
2240         while ((aRow = aResult->Next()))
2241         {
2242
2243                 TString id(aRow->GetField(0), aRow->GetFieldLength(0));
2244                 AliDebug(2, Form("fileId = %s", id.Data()));
2245                 list->Add(new TObjString(id));
2246                 delete aRow;
2247         }
2248
2249         delete aResult;
2250
2251         return list;
2252 }
2253
2254 //______________________________________________________________________________________________
2255 Bool_t AliShuttle::Connect(Int_t system)
2256 {
2257         // Connect to MySQL Server of the system's FXS MySQL databases
2258         // DAQ Logbook, Shuttle Logbook and DAQ FXS db are on the same host
2259         //
2260
2261         // check connection: if already connected return
2262         if(fServer[system] && fServer[system]->IsConnected()) return kTRUE;
2263
2264         TString dbHost, dbUser, dbPass, dbName;
2265
2266         if (system < 3) // FXS db servers
2267         {
2268                 dbHost = Form("mysql://%s:%d", fConfig->GetFXSdbHost(system), fConfig->GetFXSdbPort(system));
2269                 dbUser = fConfig->GetFXSdbUser(system);
2270                 dbPass = fConfig->GetFXSdbPass(system);
2271                 dbName =   fConfig->GetFXSdbName(system);
2272         } else { // Run & Shuttle logbook servers
2273         // TODO Will the Shuttle logbook server be the same as the Run logbook server ???
2274                 dbHost = Form("mysql://%s:%d", fConfig->GetDAQlbHost(), fConfig->GetDAQlbPort());
2275                 dbUser = fConfig->GetDAQlbUser();
2276                 dbPass = fConfig->GetDAQlbPass();
2277                 dbName =   fConfig->GetDAQlbDB();
2278         }
2279
2280         fServer[system] = TSQLServer::Connect(dbHost.Data(), dbUser.Data(), dbPass.Data());
2281         if (!fServer[system] || !fServer[system]->IsConnected()) {
2282                 if(system < 3)
2283                 {
2284                 AliError(Form("Can't establish connection to FXS database for %s",
2285                                         AliShuttleInterface::GetSystemName(system)));
2286                 } else {
2287                 AliError("Can't establish connection to Run logbook.");
2288                 }
2289                 if(fServer[system]) delete fServer[system];
2290                 return kFALSE;
2291         }
2292
2293         // Get tables
2294         TSQLResult* aResult=0;
2295         switch(system){
2296                 case kDAQ:
2297                         aResult = fServer[kDAQ]->GetTables(dbName.Data());
2298                         break;
2299                 case kDCS:
2300                         aResult = fServer[kDCS]->GetTables(dbName.Data());
2301                         break;
2302                 case kHLT:
2303                         aResult = fServer[kHLT]->GetTables(dbName.Data());
2304                         break;
2305                 default:
2306                         aResult = fServer[3]->GetTables(dbName.Data());
2307                         break;
2308         }
2309
2310         delete aResult;
2311         return kTRUE;
2312 }
2313
2314 //______________________________________________________________________________________________
2315 Bool_t AliShuttle::UpdateTable()
2316 {
2317         //
2318         // Update FXS table filling time_processed field in all rows corresponding to current run and detector
2319         //
2320
2321         Bool_t result = kTRUE;
2322
2323         for (UInt_t system=0; system<3; system++)
2324         {
2325                 if(!fFXSCalled[system]) continue;
2326
2327                 // check connection, in case connect
2328                 if (!Connect(system))
2329                 {
2330                         Log(fCurrentDetector, Form("UpdateTable - Couldn't connect to %s FXS database", GetSystemName(system)));
2331                         result = kFALSE;
2332                         continue;
2333                 }
2334
2335                 TTimeStamp now; // now
2336
2337                 // Loop on FXS list entries
2338                 TIter iter(&fFXSlist[system]);
2339                 TObjString *aFXSentry=0;
2340                 while ((aFXSentry = dynamic_cast<TObjString*> (iter.Next())))
2341                 {
2342                         TString aFXSentrystr = aFXSentry->String();
2343                         TObjArray *aFXSarray = aFXSentrystr.Tokenize("#!?!#");
2344                         if (!aFXSarray || aFXSarray->GetEntries() != 2 )
2345                         {
2346                                 Log(fCurrentDetector, Form("UpdateTable - error updating %s FXS entry. Check string: <%s>",
2347                                         GetSystemName(system), aFXSentrystr.Data()));
2348                                 if(aFXSarray) delete aFXSarray;
2349                                 result = kFALSE;
2350                                 continue;
2351                         }
2352                         const char* fileId = ((TObjString*) aFXSarray->At(0))->GetName();
2353                         const char* source = ((TObjString*) aFXSarray->At(1))->GetName();
2354
2355                         TString whereClause;
2356                         if (system == kDAQ)
2357                         {
2358                                 whereClause = Form("where run=%d and detector=\"%s\" and fileId=\"%s\" and DAQsource=\"%s\";",
2359                                                         GetCurrentRun(), fCurrentDetector.Data(), fileId, source);
2360                         }
2361                         else if (system == kDCS)
2362                         {
2363                                 whereClause = Form("where run=%d and detector=\"%s\" and fileId=\"%s\";",
2364                                                         GetCurrentRun(), fCurrentDetector.Data(), fileId);
2365                         }
2366                         else if (system == kHLT)
2367                         {
2368                                 whereClause = Form("where run=%d and detector=\"%s\" and fileId=\"%s\" and DDLnumbers=\"%s\";",
2369                                                         GetCurrentRun(), fCurrentDetector.Data(), fileId, source);
2370                         }
2371
2372                         delete aFXSarray;
2373
2374                         TString sqlQuery = Form("update %s set time_processed=%d %s", fConfig->GetFXSdbTable(system),
2375                                                                 now.GetSec(), whereClause.Data());
2376
2377                         AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
2378
2379                         // Query execution
2380                         TSQLResult* aResult;
2381                         aResult = dynamic_cast<TSQLResult*> (fServer[system]->Query(sqlQuery));
2382                         if (!aResult)
2383                         {
2384                                 Log(fCurrentDetector, Form("UpdateTable - %s db: can't execute SQL query <%s>",
2385                                                                 GetSystemName(system), sqlQuery.Data()));
2386                                 result = kFALSE;
2387                                 continue;
2388                         }
2389                         delete aResult;
2390                 }
2391         }
2392
2393         return result;
2394 }
2395
2396 //______________________________________________________________________________________________
2397 Bool_t AliShuttle::UpdateTableFailCase()
2398 {
2399         // Update FXS table filling time_processed field in all rows corresponding to current run and detector
2400         // this is called in case the preprocessor is declared failed for the current run, because
2401         // the fields are updated only in case of success
2402
2403         Bool_t result = kTRUE;
2404
2405         for (UInt_t system=0; system<3; system++)
2406         {
2407                 // check connection, in case connect
2408                 if (!Connect(system))
2409                 {
2410                         Log(fCurrentDetector, Form("UpdateTableFailCase - Couldn't connect to %s FXS database",
2411                                                         GetSystemName(system)));
2412                         result = kFALSE;
2413                         continue;
2414                 }
2415
2416                 TTimeStamp now; // now
2417
2418                 // Loop on FXS list entries
2419
2420                 TString whereClause = Form("where run=%d and detector=\"%s\";",
2421                                                 GetCurrentRun(), fCurrentDetector.Data());
2422
2423
2424                 TString sqlQuery = Form("update %s set time_processed=%d %s", fConfig->GetFXSdbTable(system),
2425                                                         now.GetSec(), whereClause.Data());
2426
2427                 AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
2428
2429                 // Query execution
2430                 TSQLResult* aResult;
2431                 aResult = dynamic_cast<TSQLResult*> (fServer[system]->Query(sqlQuery));
2432                 if (!aResult)
2433                 {
2434                         Log(fCurrentDetector, Form("UpdateTableFailCase - %s db: can't execute SQL query <%s>",
2435                                                         GetSystemName(system), sqlQuery.Data()));
2436                         result = kFALSE;
2437                         continue;
2438                 }
2439                 delete aResult;
2440         }
2441
2442         return result;
2443 }
2444
2445 //______________________________________________________________________________________________
2446 Bool_t AliShuttle::UpdateShuttleLogbook(const char* detector, const char* status)
2447 {
2448         //
2449         // Update Shuttle logbook filling detector or shuttle_done column
2450         // ex. of usage: UpdateShuttleLogbook("PHOS", "DONE") or UpdateShuttleLogbook("shuttle_done")
2451         //
2452
2453         // check connection, in case connect
2454         if(!Connect(3)){
2455                 Log("SHUTTLE", "UpdateShuttleLogbook - Couldn't connect to DAQ Logbook.");
2456                 return kFALSE;
2457         }
2458
2459         TString detName(detector);
2460         TString setClause;
2461         if(detName == "shuttle_done")
2462         {
2463                 setClause = "set shuttle_done=1";
2464
2465                 // Send the information to ML
2466                 TMonaLisaText  mlStatus("SHUTTLE_status", "Done");
2467
2468                 TList mlList;
2469                 mlList.Add(&mlStatus);
2470
2471                 fMonaLisa->SendParameters(&mlList);
2472         } else {
2473                 TString statusStr(status);
2474                 if(statusStr.Contains("done", TString::kIgnoreCase) ||
2475                    statusStr.Contains("failed", TString::kIgnoreCase)){
2476                         setClause = Form("set %s=\"%s\"", detector, status);
2477                 } else {
2478                         Log("SHUTTLE",
2479                                 Form("UpdateShuttleLogbook - Invalid status <%s> for detector %s",
2480                                         status, detector));
2481                         return kFALSE;
2482                 }
2483         }
2484
2485         TString whereClause = Form("where run=%d", GetCurrentRun());
2486
2487         TString sqlQuery = Form("update %s %s %s",
2488                                         fConfig->GetShuttlelbTable(), setClause.Data(), whereClause.Data());
2489
2490         AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
2491
2492         // Query execution
2493         TSQLResult* aResult;
2494         aResult = dynamic_cast<TSQLResult*> (fServer[3]->Query(sqlQuery));
2495         if (!aResult) {
2496                 Log("SHUTTLE", Form("UpdateShuttleLogbook - Can't execute query <%s>", sqlQuery.Data()));
2497                 return kFALSE;
2498         }
2499         delete aResult;
2500
2501         return kTRUE;
2502 }
2503
2504 //______________________________________________________________________________________________
2505 Int_t AliShuttle::GetCurrentRun() const
2506 {
2507         //
2508         // Get current run from logbook entry
2509         //
2510
2511         return fLogbookEntry ? fLogbookEntry->GetRun() : -1;
2512 }
2513
2514 //______________________________________________________________________________________________
2515 UInt_t AliShuttle::GetCurrentStartTime() const
2516 {
2517         //
2518         // get current start time
2519         //
2520
2521         return fLogbookEntry ? fLogbookEntry->GetStartTime() : 0;
2522 }
2523
2524 //______________________________________________________________________________________________
2525 UInt_t AliShuttle::GetCurrentEndTime() const
2526 {
2527         //
2528         // get current end time from logbook entry
2529         //
2530
2531         return fLogbookEntry ? fLogbookEntry->GetEndTime() : 0;
2532 }
2533
2534 //______________________________________________________________________________________________
2535 void AliShuttle::Log(const char* detector, const char* message)
2536 {
2537         //
2538         // Fill log string with a message
2539         //
2540
2541         void* dir = gSystem->OpenDirectory(GetShuttleLogDir());
2542         if (dir == NULL) {
2543                 if (gSystem->mkdir(GetShuttleLogDir(), kTRUE)) {
2544                         AliError(Form("Can't open directory <%s>", GetShuttleLogDir()));
2545                         return;
2546                 }
2547
2548         } else {
2549                 gSystem->FreeDirectory(dir);
2550         }
2551
2552         TString toLog = Form("%s (%d): %s - ", TTimeStamp(time(0)).AsString("s"), getpid(), detector);
2553         if (GetCurrentRun() >= 0) 
2554                 toLog += Form("run %d - ", GetCurrentRun());
2555         toLog += Form("%s", message);
2556
2557         AliInfo(toLog.Data());
2558         
2559         // if we redirect the log output already to the file, leave here
2560         if (fOutputRedirected && strcmp(detector, "SHUTTLE") != 0)
2561                 return;
2562
2563         TString fileName = GetLogFileName(detector);
2564         
2565         gSystem->ExpandPathName(fileName);
2566
2567         ofstream logFile;
2568         logFile.open(fileName, ofstream::out | ofstream::app);
2569
2570         if (!logFile.is_open()) {
2571                 AliError(Form("Could not open file %s", fileName.Data()));
2572                 return;
2573         }
2574
2575         logFile << toLog.Data() << "\n";
2576
2577         logFile.close();
2578 }
2579
2580 //______________________________________________________________________________________________
2581 TString AliShuttle::GetLogFileName(const char* detector) const
2582 {
2583         // 
2584         // returns the name of the log file for a given sub detector
2585         //
2586         
2587         TString fileName;
2588         
2589         if (GetCurrentRun() >= 0) 
2590                 fileName.Form("%s/%s_%d.log", GetShuttleLogDir(), detector, GetCurrentRun());
2591         else
2592                 fileName.Form("%s/%s.log", GetShuttleLogDir(), detector);
2593
2594         return fileName;
2595 }
2596
2597 //______________________________________________________________________________________________
2598 Bool_t AliShuttle::Collect(Int_t run)
2599 {
2600         //
2601         // Collects conditions data for all UNPROCESSED run written to DAQ LogBook in case of run = -1 (default)
2602         // If a dedicated run is given this run is processed
2603         //
2604         // In operational mode, this is the Shuttle function triggered by the EOR signal.
2605         //
2606
2607         if (run == -1)
2608                 Log("SHUTTLE","Collect - Shuttle called. Collecting conditions data for unprocessed runs");
2609         else
2610                 Log("SHUTTLE", Form("Collect - Shuttle called. Collecting conditions data for run %d", run));
2611
2612         SetLastAction("Starting");
2613
2614         TString whereClause("where shuttle_done=0");
2615         if (run != -1)
2616                 whereClause += Form(" and run=%d", run);
2617
2618         TObjArray shuttleLogbookEntries;
2619         if (!QueryShuttleLogbook(whereClause, shuttleLogbookEntries))
2620         {
2621                 Log("SHUTTLE", "Collect - Can't retrieve entries from Shuttle logbook");
2622                 return kFALSE;
2623         }
2624
2625         if (shuttleLogbookEntries.GetEntries() == 0)
2626         {
2627                 if (run == -1)
2628                         Log("SHUTTLE","Collect - Found no UNPROCESSED runs in Shuttle logbook");
2629                 else
2630                         Log("SHUTTLE", Form("Collect - Run %d is already DONE "
2631                                                 "or it does not exist in Shuttle logbook", run));
2632                 return kTRUE;
2633         }
2634
2635         for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
2636                 fFirstUnprocessed[iDet] = kTRUE;
2637
2638         if (run != -1)
2639         {
2640                 // query Shuttle logbook for earlier runs, check if some detectors are unprocessed,
2641                 // flag them into fFirstUnprocessed array
2642                 TString whereClause(Form("where shuttle_done=0 and run < %d", run));
2643                 TObjArray tmpLogbookEntries;
2644                 if (!QueryShuttleLogbook(whereClause, tmpLogbookEntries))
2645                 {
2646                         Log("SHUTTLE", "Collect - Can't retrieve entries from Shuttle logbook");
2647                         return kFALSE;
2648                 }
2649
2650                 TIter iter(&tmpLogbookEntries);
2651                 AliShuttleLogbookEntry* anEntry = 0;
2652                 while ((anEntry = dynamic_cast<AliShuttleLogbookEntry*> (iter.Next())))
2653                 {
2654                         for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
2655                         {
2656                                 if (anEntry->GetDetectorStatus(iDet) == AliShuttleLogbookEntry::kUnprocessed)
2657                                 {
2658                                         AliDebug(2, Form("Run %d: setting %s as \"not first time unprocessed\"",
2659                                                         anEntry->GetRun(), GetDetName(iDet)));
2660                                         fFirstUnprocessed[iDet] = kFALSE;
2661                                 }
2662                         }
2663
2664                 }
2665
2666         }
2667
2668         if (!RetrieveConditionsData(shuttleLogbookEntries))
2669         {
2670                 Log("SHUTTLE", "Collect - Process of at least one run failed");
2671                 return kFALSE;
2672         }
2673
2674         Log("SHUTTLE", "Collect - Requested run(s) successfully processed");
2675         return kTRUE;
2676 }
2677
2678 //______________________________________________________________________________________________
2679 Bool_t AliShuttle::RetrieveConditionsData(const TObjArray& dateEntries)
2680 {
2681         //
2682         // Retrieve conditions data for all runs that aren't processed yet
2683         //
2684
2685         Bool_t hasError = kFALSE;
2686
2687         TIter iter(&dateEntries);
2688         AliShuttleLogbookEntry* anEntry;
2689
2690         while ((anEntry = (AliShuttleLogbookEntry*) iter.Next())){
2691                 if (!Process(anEntry)){
2692                         hasError = kTRUE;
2693                 }
2694
2695                 // clean SHUTTLE temp directory
2696                 TString filename = Form("%s/*.shuttle", GetShuttleTempDir());
2697                 RemoveFile(filename.Data());
2698         }
2699
2700         return hasError == kFALSE;
2701 }
2702
2703 //______________________________________________________________________________________________
2704 ULong_t AliShuttle::GetTimeOfLastAction() const
2705 {
2706         //
2707         // Gets time of last action
2708         //
2709
2710         ULong_t tmp;
2711
2712         fMonitoringMutex->Lock();
2713
2714         tmp = fLastActionTime;
2715
2716         fMonitoringMutex->UnLock();
2717
2718         return tmp;
2719 }
2720
2721 //______________________________________________________________________________________________
2722 const TString AliShuttle::GetLastAction() const
2723 {
2724         //
2725         // returns a string description of the last action
2726         //
2727
2728         TString tmp;
2729
2730         fMonitoringMutex->Lock();
2731         
2732         tmp = fLastAction;
2733         
2734         fMonitoringMutex->UnLock();
2735
2736         return tmp;
2737 }
2738
2739 //______________________________________________________________________________________________
2740 void AliShuttle::SetLastAction(const char* action)
2741 {
2742         //
2743         // updates the monitoring variables
2744         //
2745
2746         fMonitoringMutex->Lock();
2747
2748         fLastAction = action;
2749         fLastActionTime = time(0);
2750         
2751         fMonitoringMutex->UnLock();
2752 }
2753
2754 //______________________________________________________________________________________________
2755 const char* AliShuttle::GetRunParameter(const char* param)
2756 {
2757         //
2758         // returns run parameter read from DAQ logbook
2759         //
2760
2761         if(!fLogbookEntry) {
2762                 AliError("No logbook entry!");
2763                 return 0;
2764         }
2765
2766         return fLogbookEntry->GetRunParameter(param);
2767 }
2768
2769 //______________________________________________________________________________________________
2770 AliCDBEntry* AliShuttle::GetFromOCDB(const char* detector, const AliCDBPath& path)
2771 {
2772         //
2773         // returns object from OCDB valid for current run
2774         //
2775
2776         if (fTestMode & kErrorOCDB)
2777         {
2778                 Log(detector, "GetFromOCDB - In TESTMODE - Simulating error with OCDB");
2779                 return 0;
2780         }
2781         
2782         AliCDBStorage *sto = AliCDBManager::Instance()->GetStorage(fgkMainCDB);
2783         if (!sto)
2784         {
2785                 Log(detector, "GetFromOCDB - Cannot activate main OCDB for query!");
2786                 return 0;
2787         }
2788
2789         return dynamic_cast<AliCDBEntry*> (sto->Get(path, GetCurrentRun()));
2790 }
2791
2792 //______________________________________________________________________________________________
2793 Bool_t AliShuttle::SendMail()
2794 {
2795         //
2796         // sends a mail to the subdetector expert in case of preprocessor error
2797         //
2798         
2799         if (fTestMode != kNone)
2800                 return kTRUE;
2801
2802         void* dir = gSystem->OpenDirectory(GetShuttleLogDir());
2803         if (dir == NULL)
2804         {
2805                 if (gSystem->mkdir(GetShuttleLogDir(), kTRUE))
2806                 {
2807                         AliError(Form("Can't open directory <%s>", GetShuttleLogDir()));
2808                         return kFALSE;
2809                 }
2810
2811         } else {
2812                 gSystem->FreeDirectory(dir);
2813         }
2814
2815         TString bodyFileName;
2816         bodyFileName.Form("%s/mail.body", GetShuttleLogDir());
2817         gSystem->ExpandPathName(bodyFileName);
2818
2819         ofstream mailBody;
2820         mailBody.open(bodyFileName, ofstream::out);
2821
2822         if (!mailBody.is_open())
2823         {
2824                 AliError(Form("Could not open mail body file %s", bodyFileName.Data()));
2825                 return kFALSE;
2826         }
2827
2828         TString to="";
2829         TIter iterExperts(fConfig->GetResponsibles(fCurrentDetector));
2830         TObjString *anExpert=0;
2831         while ((anExpert = (TObjString*) iterExperts.Next()))
2832         {
2833                 to += Form("%s,", anExpert->GetName());
2834         }
2835         to.Remove(to.Length()-1);
2836         AliDebug(2, Form("to: %s",to.Data()));
2837
2838         if (to.IsNull()) {
2839                 AliInfo("List of detector responsibles not yet set!");
2840                 return kFALSE;
2841         }
2842
2843         TString cc="alberto.colla@cern.ch";
2844
2845         TString subject = Form("%s Shuttle preprocessor FAILED in run %d !",
2846                                 fCurrentDetector.Data(), GetCurrentRun());
2847         AliDebug(2, Form("subject: %s", subject.Data()));
2848
2849         TString body = Form("Dear %s expert(s), \n\n", fCurrentDetector.Data());
2850         body += Form("SHUTTLE just detected that your preprocessor "
2851                         "failed processing run %d!!\n\n", GetCurrentRun());
2852         body += Form("Please check %s status on the SHUTTLE monitoring page: \n\n", fCurrentDetector.Data());
2853         body += Form("\thttp://pcalimonitor.cern.ch:8889/shuttle.jsp?time=168 \n\n");
2854         body += Form("Find the %s log for the current run on \n\n"
2855                 "\thttp://pcalishuttle01.cern.ch:8880/logs/%s_%d.log \n\n", 
2856                 fCurrentDetector.Data(), fCurrentDetector.Data(), GetCurrentRun());
2857         body += Form("The last 10 lines of %s log file are following:\n\n");
2858
2859         AliDebug(2, Form("Body begin: %s", body.Data()));
2860
2861         mailBody << body.Data();
2862         mailBody.close();
2863         mailBody.open(bodyFileName, ofstream::out | ofstream::app);
2864
2865         TString logFileName = Form("%s/%s_%d.log", GetShuttleLogDir(), fCurrentDetector.Data(), GetCurrentRun());
2866         TString tailCommand = Form("tail -n 10 %s >> %s", logFileName.Data(), bodyFileName.Data());
2867         if (gSystem->Exec(tailCommand.Data()))
2868         {
2869                 mailBody << Form("%s log file not found ...\n\n", fCurrentDetector.Data());
2870         }
2871
2872         TString endBody = Form("------------------------------------------------------\n\n");
2873         endBody += Form("In case of problems please contact the SHUTTLE core team.\n\n");
2874         endBody += "Please do not answer this message directly, it is automatically generated.\n\n";
2875         endBody += "Greetings,\n\n \t\t\tthe SHUTTLE\n";
2876
2877         AliDebug(2, Form("Body end: %s", endBody.Data()));
2878
2879         mailBody << endBody.Data();
2880
2881         mailBody.close();
2882
2883         // send mail!
2884         TString mailCommand = Form("mail -s \"%s\" -c %s %s < %s",
2885                                                 subject.Data(),
2886                                                 cc.Data(),
2887                                                 to.Data(),
2888                                                 bodyFileName.Data());
2889         AliDebug(2, Form("mail command: %s", mailCommand.Data()));
2890
2891         Bool_t result = gSystem->Exec(mailCommand.Data());
2892
2893         return result == 0;
2894 }
2895
2896 //______________________________________________________________________________________________
2897 const char* AliShuttle::GetRunType()
2898 {
2899         //
2900         // returns run type read from "run type" logbook
2901         //
2902
2903         if(!fLogbookEntry) {
2904                 AliError("No logbook entry!");
2905                 return 0;
2906         }
2907
2908         return fLogbookEntry->GetRunType();
2909 }
2910
2911 //______________________________________________________________________________________________
2912 Bool_t AliShuttle::GetHLTStatus()
2913 {
2914         // Return HLT status (ON=1 OFF=0)
2915         // Converts the HLT status from the status string read in the run logbook (not just a bool)
2916
2917         if(!fLogbookEntry) {
2918                 AliError("No logbook entry!");
2919                 return 0;
2920         }
2921
2922         // TODO implement when HLTStatus is inserted in run logbook
2923         //TString hltStatus = fLogbookEntry->GetRunParameter("HLTStatus");
2924         //if(hltStatus == "OFF") {return kFALSE};
2925
2926         return kTRUE;
2927 }
2928
2929 //______________________________________________________________________________________________
2930 void AliShuttle::SetShuttleTempDir(const char* tmpDir)
2931 {
2932         //
2933         // sets Shuttle temp directory
2934         //
2935
2936         fgkShuttleTempDir = gSystem->ExpandPathName(tmpDir);
2937 }
2938
2939 //______________________________________________________________________________________________
2940 void AliShuttle::SetShuttleLogDir(const char* logDir)
2941 {
2942         //
2943         // sets Shuttle log directory
2944         //
2945
2946         fgkShuttleLogDir = gSystem->ExpandPathName(logDir);
2947 }