]> git.uio.no Git - u/mrichter/AliRoot.git/blob - SHUTTLE/AliShuttle.cxx
AliDCSClient "multiSplit" option added in the DCS configuration
[u/mrichter/AliRoot.git] / SHUTTLE / AliShuttle.cxx
1 /**************************************************************************
2  * Copyright(c) 1998-1999, ALICE Experiment at CERN, All rights reserved. *
3  *                                                                        *
4  * Author: The ALICE Off-line Project.                                    *
5  * Contributors are mentioned in the code where appropriate.              *
6  *                                                                        *
7  * Permission to use, copy, modify and distribute this software and its   *
8  * documentation strictly for non-commercial purposes is hereby granted   *
9  * without fee, provided that the above copyright notice appears in all   *
10  * copies and that both the copyright notice and this permission notice   *
11  * appear in the supporting documentation. The authors make no claims     *
12  * about the suitability of this software for any purpose. It is          *
13  * provided "as is" without express or implied warranty.                  *
14  **************************************************************************/
15
16 /*
17 $Log$
18 Revision 1.57  2007/09/27 16:53:13  acolla
19 Detectors can have more than one AMANDA server. SHUTTLE queries the servers sequentially,
20 merges the dcs aliases/DPs in one TMap and sends it to the preprocessor.
21
22 Revision 1.56  2007/09/14 16:46:14  jgrosseo
23 1) Connect and Close are called before and after each query, so one can
24 keep the same AliDCSClient object.
25 2) The splitting of a query is moved to GetDPValues/GetAliasValues.
26 3) Splitting interval can be specified in constructor
27
28 Revision 1.55  2007/08/06 12:26:40  acolla
29 Function Bool_t GetHLTStatus added to preprocessor. It returns the status of HLT
30 read from the run logbook.
31
32 Revision 1.54  2007/07/12 09:51:25  jgrosseo
33 removed duplicated log message in GetFile
34
35 Revision 1.53  2007/07/12 09:26:28  jgrosseo
36 updating hlt fxs base path
37
38 Revision 1.52  2007/07/12 08:06:45  jgrosseo
39 adding log messages in getfile... functions
40 adding not implemented copy constructor in alishuttleconfigholder
41
42 Revision 1.51  2007/07/03 17:24:52  acolla
43 root moved to v5-16-00. TFileMerger->Cp moved to TFile::Cp.
44
45 Revision 1.50  2007/07/02 17:19:32  acolla
46 preprocessor is run in a temp directory that is removed when process is finished.
47
48 Revision 1.49  2007/06/29 10:45:06  acolla
49 Number of columns in MySql Shuttle logbook increased by one (HLT added)
50
51 Revision 1.48  2007/06/21 13:06:19  acolla
52 GetFileSources returns dummy list with 1 source if system=DCS (better than
53 returning error as it was)
54
55 Revision 1.47  2007/06/19 17:28:56  acolla
56 HLT updated; missing map bug removed.
57
58 Revision 1.46  2007/06/09 13:01:09  jgrosseo
59 Switching to retrieval of several DCS DPs at a time (multiDPrequest)
60
61 Revision 1.45  2007/05/30 06:35:20  jgrosseo
62 Adding functionality to the Shuttle/TestShuttle:
63 o) Function to retrieve list of sources from a given system (GetFileSources with id=0)
64 o) Function to retrieve list of IDs for a given source      (GetFileIDs)
65 These functions are needed for dealing with the tag files that are saved for the GRP preprocessor
66 Example code has been added to the TestProcessor in TestShuttle
67
68 Revision 1.44  2007/05/11 16:09:32  acolla
69 Reference files for ITS, MUON and PHOS are now stored in OfflineDetName/OnlineDetName/run_...
70 example: ITS/SPD/100_filename.root
71
72 Revision 1.43  2007/05/10 09:59:51  acolla
73 Various bug fixes in StoreRefFilesToGrid; Cleaning of reference storage before processing detector (CleanReferenceStorage)
74
75 Revision 1.42  2007/05/03 08:01:39  jgrosseo
76 typo in last commit :-(
77
78 Revision 1.41  2007/05/03 08:00:48  jgrosseo
79 fixing log message when pp want to skip dcs value retrieval
80
81 Revision 1.40  2007/04/27 07:06:48  jgrosseo
82 GetFileSources returns empty list in case of no files, but successful query
83 No mails sent in testmode
84
85 Revision 1.39  2007/04/17 12:43:57  acolla
86 Correction in StoreOCDB; change of text in mail to detector expert
87
88 Revision 1.38  2007/04/12 08:26:18  jgrosseo
89 updated comment
90
91 Revision 1.37  2007/04/10 16:53:14  jgrosseo
92 redirecting sub detector stdout, stderr to sub detector log file
93
94 Revision 1.35  2007/04/04 16:26:38  acolla
95 1. Re-organization of function calls in TestPreprocessor to make it more meaningful.
96 2. Added missing dependency in test preprocessors.
97 3. in AliShuttle.cxx: processing time and memory consumption info on a single line.
98
99 Revision 1.34  2007/04/04 10:33:36  jgrosseo
100 1) Storing of files to the Grid is now done _after_ your preprocessors succeeded. This is transparent, which means that you can still use the same functions (Store, StoreReferenceData) to store files to the Grid. However, the Shuttle first stores them locally and transfers them after the preprocessor finished. The return code of these two functions has changed from UInt_t to Bool_t which gives you the success of the storing.
101 In case of an error with the Grid, the Shuttle will retry the storing later, the preprocessor does not need to be run again.
102
103 2) The meaning of the return code of the preprocessor has changed. 0 is now success and any other value means failure. This value is stored in the log and you can use it to keep details about the error condition.
104
105 3) New function StoreReferenceFile to _directly_ store a file (without opening it) to the reference storage.
106
107 4) The memory usage of the preprocessor is monitored. If it exceeds 2 GB it is terminated.
108
109 5) New function AliPreprocessor::ProcessDCS(). If you do not need to have DCS data in all cases, you can skip the processing by implemting this function and returning kFALSE under certain conditions. E.g. if there is a certain run type.
110 If you always need DCS data (like before), you do not need to implement it.
111
112 6) The run type has been added to the monitoring page
113
114 Revision 1.33  2007/04/03 13:56:01  acolla
115 Grid Storage at the end of preprocessing. Added virtual method to disable DCS query according to the
116 run type.
117
118 Revision 1.32  2007/02/28 10:41:56  acolla
119 Run type field added in SHUTTLE framework. Run type is read from "run type" logbook and retrieved by
120 AliPreprocessor::GetRunType() function.
121 Added some ldap definition files.
122
123 Revision 1.30  2007/02/13 11:23:21  acolla
124 Moved getters and setters of Shuttle's main OCDB/Reference, local
125 OCDB/Reference, temp and log folders to AliShuttleInterface
126
127 Revision 1.27  2007/01/30 17:52:42  jgrosseo
128 adding monalisa monitoring
129
130 Revision 1.26  2007/01/23 19:20:03  acolla
131 Removed old ldif files, added TOF, MCH ldif files. Added some options in
132 AliShuttleConfig::Print. Added in Ali Shuttle: SetShuttleTempDir and
133 SetShuttleLogDir
134
135 Revision 1.25  2007/01/15 19:13:52  acolla
136 Moved some AliInfo to AliDebug in SendMail function
137
138 Revision 1.21  2006/12/07 08:51:26  jgrosseo
139 update (alberto):
140 table, db names in ldap configuration
141 added GRP preprocessor
142 DCS data can also be retrieved by data point
143
144 Revision 1.20  2006/11/16 16:16:48  jgrosseo
145 introducing strict run ordering flag
146 removed giving preprocessor name to preprocessor, they have to know their name themselves ;-)
147
148 Revision 1.19  2006/11/06 14:23:04  jgrosseo
149 major update (Alberto)
150 o) reading of run parameters from the logbook
151 o) online offline naming conversion
152 o) standalone DCSclient package
153
154 Revision 1.18  2006/10/20 15:22:59  jgrosseo
155 o) Adding time out to the execution of the preprocessors: The Shuttle forks and the parent process monitors the child
156 o) Merging Collect, CollectAll, CollectNew function
157 o) Removing implementation of empty copy constructors (declaration still there!)
158
159 Revision 1.17  2006/10/05 16:20:55  jgrosseo
160 adapting to new CDB classes
161
162 Revision 1.16  2006/10/05 15:46:26  jgrosseo
163 applying to the new interface
164
165 Revision 1.15  2006/10/02 16:38:39  jgrosseo
166 update (alberto):
167 fixed memory leaks
168 storing of objects that failed to be stored to the grid before
169 interfacing of shuttle status table in daq system
170
171 Revision 1.14  2006/08/29 09:16:05  jgrosseo
172 small update
173
174 Revision 1.13  2006/08/15 10:50:00  jgrosseo
175 effc++ corrections (alberto)
176
177 Revision 1.12  2006/08/08 14:19:29  jgrosseo
178 Update to shuttle classes (Alberto)
179
180 - Possibility to set the full object's path in the Preprocessor's and
181 Shuttle's  Store functions
182 - Possibility to extend the object's run validity in the same classes
183 ("startValidity" and "validityInfinite" parameters)
184 - Implementation of the StoreReferenceData function to store reference
185 data in a dedicated CDB storage.
186
187 Revision 1.11  2006/07/21 07:37:20  jgrosseo
188 last run is stored after each run
189
190 Revision 1.10  2006/07/20 09:54:40  jgrosseo
191 introducing status management: The processing per subdetector is divided into several steps,
192 after each step the status is stored on disk. If the system crashes in any of the steps the Shuttle
193 can keep track of the number of failures and skips further processing after a certain threshold is
194 exceeded. These thresholds can be configured in LDAP.
195
196 Revision 1.9  2006/07/19 10:09:55  jgrosseo
197 new configuration, accesst to DAQ FES (Alberto)
198
199 Revision 1.8  2006/07/11 12:44:36  jgrosseo
200 adding parameters for extended validity range of data produced by preprocessor
201
202 Revision 1.7  2006/07/10 14:37:09  jgrosseo
203 small fix + todo comment
204
205 Revision 1.6  2006/07/10 13:01:41  jgrosseo
206 enhanced storing of last sucessfully processed run (alberto)
207
208 Revision 1.5  2006/07/04 14:59:57  jgrosseo
209 revision of AliDCSValue: Removed wrapper classes, reduced storage size per value by factor 2
210
211 Revision 1.4  2006/06/12 09:11:16  jgrosseo
212 coding conventions (Alberto)
213
214 Revision 1.3  2006/06/06 14:26:40  jgrosseo
215 o) removed files that were moved to STEER
216 o) shuttle updated to follow the new interface (Alberto)
217
218 Revision 1.2  2006/03/07 07:52:34  hristov
219 New version (B.Yordanov)
220
221 Revision 1.6  2005/11/19 17:19:14  byordano
222 RetrieveDATEEntries and RetrieveConditionsData added
223
224 Revision 1.5  2005/11/19 11:09:27  byordano
225 AliShuttle declaration added
226
227 Revision 1.4  2005/11/17 17:47:34  byordano
228 TList changed to TObjArray
229
230 Revision 1.3  2005/11/17 14:43:23  byordano
231 import to local CVS
232
233 Revision 1.1.1.1  2005/10/28 07:33:58  hristov
234 Initial import as subdirectory in AliRoot
235
236 Revision 1.2  2005/09/13 08:41:15  byordano
237 default startTime endTime added
238
239 Revision 1.4  2005/08/30 09:13:02  byordano
240 some docs added
241
242 Revision 1.3  2005/08/29 21:15:47  byordano
243 some docs added
244
245 */
246
247 //
248 // This class is the main manager for AliShuttle. 
249 // It organizes the data retrieval from DCS and call the 
250 // interface methods of AliPreprocessor.
251 // For every detector in AliShuttleConfgi (see AliShuttleConfig),
252 // data for its set of aliases is retrieved. If there is registered
253 // AliPreprocessor for this detector then it will be used
254 // accroding to the schema (see AliPreprocessor).
255 // If there isn't registered AliPreprocessor than the retrieved
256 // data is stored automatically to the undelying AliCDBStorage.
257 // For detSpec is used the alias name.
258 //
259
260 #include "AliShuttle.h"
261
262 #include "AliCDBManager.h"
263 #include "AliCDBStorage.h"
264 #include "AliCDBId.h"
265 #include "AliCDBRunRange.h"
266 #include "AliCDBPath.h"
267 #include "AliCDBEntry.h"
268 #include "AliShuttleConfig.h"
269 #include "DCSClient/AliDCSClient.h"
270 #include "AliLog.h"
271 #include "AliPreprocessor.h"
272 #include "AliShuttleStatus.h"
273 #include "AliShuttleLogbookEntry.h"
274
275 #include <TSystem.h>
276 #include <TObject.h>
277 #include <TString.h>
278 #include <TTimeStamp.h>
279 #include <TObjString.h>
280 #include <TSQLServer.h>
281 #include <TSQLResult.h>
282 #include <TSQLRow.h>
283 #include <TMutex.h>
284 #include <TSystemDirectory.h>
285 #include <TSystemFile.h>
286 #include <TFile.h>
287 #include <TGrid.h>
288 #include <TGridResult.h>
289
290 #include <TMonaLisaWriter.h>
291
292 #include <fstream>
293
294 #include <sys/types.h>
295 #include <sys/wait.h>
296
297 ClassImp(AliShuttle)
298
299 //______________________________________________________________________________________________
300 AliShuttle::AliShuttle(const AliShuttleConfig* config,
301                 UInt_t timeout, Int_t retries):
302 fConfig(config),
303 fTimeout(timeout), fRetries(retries),
304 fPreprocessorMap(),
305 fLogbookEntry(0),
306 fCurrentDetector(),
307 fStatusEntry(0),
308 fMonitoringMutex(0),
309 fLastActionTime(0),
310 fLastAction(),
311 fMonaLisa(0),
312 fTestMode(kNone),
313 fReadTestMode(kFALSE),
314 fOutputRedirected(kFALSE)
315 {
316         //
317         // config: AliShuttleConfig used
318         // timeout: timeout used for AliDCSClient connection
319         // retries: the number of retries in case of connection error.
320         //
321
322         if (!fConfig->IsValid()) AliFatal("********** !!!!! Invalid configuration !!!!! **********");
323         for(int iSys=0;iSys<4;iSys++) {
324                 fServer[iSys]=0;
325                 if (iSys < 3)
326                         fFXSlist[iSys].SetOwner(kTRUE);
327         }
328         fPreprocessorMap.SetOwner(kTRUE);
329
330         for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
331                 fFirstUnprocessed[iDet] = kFALSE;
332
333         fMonitoringMutex = new TMutex();
334 }
335
336 //______________________________________________________________________________________________
337 AliShuttle::~AliShuttle()
338 {
339         //
340         // destructor
341         //
342
343         fPreprocessorMap.DeleteAll();
344         for(int iSys=0;iSys<4;iSys++)
345                 if(fServer[iSys]) {
346                         fServer[iSys]->Close();
347                         delete fServer[iSys];
348                         fServer[iSys] = 0;
349                 }
350
351         if (fStatusEntry){
352                 delete fStatusEntry;
353                 fStatusEntry = 0;
354         }
355         
356         if (fMonitoringMutex) 
357         {
358                 delete fMonitoringMutex;
359                 fMonitoringMutex = 0;
360         }
361 }
362
363 //______________________________________________________________________________________________
364 void AliShuttle::RegisterPreprocessor(AliPreprocessor* preprocessor)
365 {
366         //
367         // Registers new AliPreprocessor.
368         // It uses GetName() for indentificator of the pre processor.
369         // The pre processor is registered it there isn't any other
370         // with the same identificator (GetName()).
371         //
372
373         const char* detName = preprocessor->GetName();
374         if(GetDetPos(detName) < 0)
375                 AliFatal(Form("********** !!!!! Invalid detector name: %s !!!!! **********", detName));
376
377         if (fPreprocessorMap.GetValue(detName)) {
378                 AliWarning(Form("AliPreprocessor %s is already registered!", detName));
379                 return;
380         }
381
382         fPreprocessorMap.Add(new TObjString(detName), preprocessor);
383 }
384 //______________________________________________________________________________________________
385 Bool_t AliShuttle::Store(const AliCDBPath& path, TObject* object,
386                 AliCDBMetaData* metaData, Int_t validityStart, Bool_t validityInfinite)
387 {
388         // Stores a CDB object in the storage for offline reconstruction. Objects that are not needed for
389         // offline reconstruction, but should be stored anyway (e.g. for debugging) should NOT be stored
390         // using this function. Use StoreReferenceData instead!
391         // It calls StoreLocally function which temporarily stores the data locally; when the preprocessor
392         // finishes the data are transferred to the main storage (Grid).
393
394         return StoreLocally(fgkLocalCDB, path, object, metaData, validityStart, validityInfinite);
395 }
396
397 //______________________________________________________________________________________________
398 Bool_t AliShuttle::StoreReferenceData(const AliCDBPath& path, TObject* object, AliCDBMetaData* metaData)
399 {
400         // Stores a CDB object in the storage for reference data. This objects will not be available during
401         // offline reconstrunction. Use this function for reference data only!
402         // It calls StoreLocally function which temporarily stores the data locally; when the preprocessor
403         // finishes the data are transferred to the main storage (Grid).
404
405         return StoreLocally(fgkLocalRefStorage, path, object, metaData);
406 }
407
408 //______________________________________________________________________________________________
409 Bool_t AliShuttle::StoreLocally(const TString& localUri,
410                         const AliCDBPath& path, TObject* object, AliCDBMetaData* metaData,
411                         Int_t validityStart, Bool_t validityInfinite)
412 {
413         // Store object temporarily in local storage. Parameters are passed by Store and StoreReferenceData functions.
414         // when the preprocessor finishes the data are transferred to the main storage (Grid).
415         // The parameters are:
416         //   1) Uri of the backup storage (Local)
417         //   2) the object's path.
418         //   3) the object to be stored
419         //   4) the metaData to be associated with the object
420         //   5) the validity start run number w.r.t. the current run,
421         //      if the data is valid only for this run leave the default 0
422         //   6) specifies if the calibration data is valid for infinity (this means until updated),
423         //      typical for calibration runs, the default is kFALSE
424         //
425         // returns 0 if fail, 1 otherwise
426
427         if (fTestMode & kErrorStorage)
428         {
429                 Log(fCurrentDetector, "StoreLocally - In TESTMODE - Simulating error while storing locally");
430                 return kFALSE;
431         }
432         
433         const char* cdbType = (localUri == fgkLocalCDB) ? "CDB" : "Reference";
434
435         Int_t firstRun = GetCurrentRun() - validityStart;
436         if(firstRun < 0) {
437                 AliWarning("First valid run happens to be less than 0! Setting it to 0.");
438                 firstRun=0;
439         }
440
441         Int_t lastRun = -1;
442         if(validityInfinite) {
443                 lastRun = AliCDBRunRange::Infinity();
444         } else {
445                 lastRun = GetCurrentRun();
446         }
447
448         // Version is set to current run, it will be used later to transfer data to Grid
449         AliCDBId id(path, firstRun, lastRun, GetCurrentRun(), -1);
450
451         if(! dynamic_cast<TObjString*> (metaData->GetProperty("RunUsed(TObjString)"))){
452                 TObjString runUsed = Form("%d", GetCurrentRun());
453                 metaData->SetProperty("RunUsed(TObjString)", runUsed.Clone());
454         }
455
456         Bool_t result = kFALSE;
457
458         if (!(AliCDBManager::Instance()->GetStorage(localUri))) {
459                 Log("SHUTTLE", Form("StoreLocally - Cannot activate local %s storage", cdbType));
460         } else {
461                 result = AliCDBManager::Instance()->GetStorage(localUri)
462                                         ->Put(object, id, metaData);
463         }
464
465         if(!result) {
466
467                 Log(fCurrentDetector, Form("StoreLocally - Can't store object <%s>!", id.ToString().Data()));
468         }
469
470         return result;
471 }
472
473 //______________________________________________________________________________________________
474 Bool_t AliShuttle::StoreOCDB()
475 {
476         //
477         // Called when preprocessor ends successfully or when previous storage attempt failed (kStoreError status)
478         // Calls underlying StoreOCDB(const char*) function twice, for OCDB and Reference storage.
479         // Then calls StoreRefFilesToGrid to store reference files. 
480         //
481         
482         if (fTestMode & kErrorGrid)
483         {
484                 Log("SHUTTLE", "StoreOCDB - In TESTMODE - Simulating error while storing in the Grid");
485                 Log(fCurrentDetector, "StoreOCDB - In TESTMODE - Simulating error while storing in the Grid");
486                 return kFALSE;
487         }
488         
489         Log("SHUTTLE","Storing OCDB data ...");
490         Bool_t resultCDB = StoreOCDB(fgkMainCDB);
491
492         Log("SHUTTLE","Storing reference data ...");
493         Bool_t resultRef = StoreOCDB(fgkMainRefStorage);
494         
495         Log("SHUTTLE","Storing reference files ...");
496         Bool_t resultRefFiles = StoreRefFilesToGrid();
497         
498         return resultCDB && resultRef && resultRefFiles;
499 }
500
501 //______________________________________________________________________________________________
502 Bool_t AliShuttle::StoreOCDB(const TString& gridURI)
503 {
504         //
505         // Called by StoreOCDB(), performs actual storage to the main OCDB and reference storages (Grid)
506         //
507
508         TObjArray* gridIds=0;
509
510         Bool_t result = kTRUE;
511
512         const char* type = 0;
513         TString localURI;
514         if(gridURI == fgkMainCDB) {
515                 type = "OCDB";
516                 localURI = fgkLocalCDB;
517         } else if(gridURI == fgkMainRefStorage) {
518                 type = "reference";
519                 localURI = fgkLocalRefStorage;
520         } else {
521                 AliError(Form("Invalid storage URI: %s", gridURI.Data()));
522                 return kFALSE;
523         }
524
525         AliCDBManager* man = AliCDBManager::Instance();
526
527         AliCDBStorage *gridSto = man->GetStorage(gridURI);
528         if(!gridSto) {
529                 Log("SHUTTLE",
530                         Form("StoreOCDB - cannot activate main %s storage", type));
531                 return kFALSE;
532         }
533
534         gridIds = gridSto->GetQueryCDBList();
535
536         // get objects previously stored in local CDB
537         AliCDBStorage *localSto = man->GetStorage(localURI);
538         if(!localSto) {
539                 Log("SHUTTLE",
540                         Form("StoreOCDB - cannot activate local %s storage", type));
541                 return kFALSE;
542         }
543         AliCDBPath aPath(GetOfflineDetName(fCurrentDetector.Data()),"*","*");
544         // Local objects were stored with current run as Grid version!
545         TList* localEntries = localSto->GetAll(aPath.GetPath(), GetCurrentRun(), GetCurrentRun());
546         localEntries->SetOwner(1);
547
548         // loop on local stored objects
549         TIter localIter(localEntries);
550         AliCDBEntry *aLocEntry = 0;
551         while((aLocEntry = dynamic_cast<AliCDBEntry*> (localIter.Next()))){
552                 aLocEntry->SetOwner(1);
553                 AliCDBId aLocId = aLocEntry->GetId();
554                 aLocEntry->SetVersion(-1);
555                 aLocEntry->SetSubVersion(-1);
556
557                 // If local object is valid up to infinity we store it only if it is
558                 // the first unprocessed run!
559                 if (aLocId.GetLastRun() == AliCDBRunRange::Infinity() &&
560                         !fFirstUnprocessed[GetDetPos(fCurrentDetector)])
561                 {
562                         Log("SHUTTLE", Form("StoreOCDB - %s: object %s has validity infinite but "
563                                                 "there are previous unprocessed runs!",
564                                                 fCurrentDetector.Data(), aLocId.GetPath().Data()));
565                         continue;
566                 }
567
568                 // loop on Grid valid Id's
569                 Bool_t store = kTRUE;
570                 TIter gridIter(gridIds);
571                 AliCDBId* aGridId = 0;
572                 while((aGridId = dynamic_cast<AliCDBId*> (gridIter.Next()))){
573                         if(aGridId->GetPath() != aLocId.GetPath()) continue;
574                         // skip all objects valid up to infinity
575                         if(aGridId->GetLastRun() == AliCDBRunRange::Infinity()) continue;
576                         // if we get here, it means there's already some more recent object stored on Grid!
577                         store = kFALSE;
578                         break;
579                 }
580
581                 // If we get here, the file can be stored!
582                 Bool_t storeOk = gridSto->Put(aLocEntry);
583                 if(!store || storeOk){
584
585                         if (!store)
586                         {
587                                 Log(fCurrentDetector.Data(),
588                                         Form("StoreOCDB - A more recent object already exists in %s storage: <%s>",
589                                                 type, aGridId->ToString().Data()));
590                         } else {
591                                 Log("SHUTTLE",
592                                         Form("StoreOCDB - Object <%s> successfully put into %s storage",
593                                                 aLocId.ToString().Data(), type));
594                                 Log(fCurrentDetector.Data(),
595                                         Form("StoreOCDB - Object <%s> successfully put into %s storage",
596                                                 aLocId.ToString().Data(), type));
597                         }
598
599                         // removing local filename...
600                         TString filename;
601                         localSto->IdToFilename(aLocId, filename);
602                         AliInfo(Form("Removing local file %s", filename.Data()));
603                         RemoveFile(filename.Data());
604                         continue;
605                 } else  {
606                         Log("SHUTTLE",
607                                 Form("StoreOCDB - Grid %s storage of object <%s> failed",
608                                         type, aLocId.ToString().Data()));
609                         Log(fCurrentDetector.Data(),
610                                 Form("StoreOCDB - Grid %s storage of object <%s> failed",
611                                         type, aLocId.ToString().Data()));
612                         result = kFALSE;
613                 }
614         }
615         localEntries->Clear();
616
617         return result;
618 }
619
620 //______________________________________________________________________________________________
621 Bool_t AliShuttle::CleanReferenceStorage(const char* detector)
622 {
623         // clears the directory used to store reference files of a given subdetector
624   
625         AliCDBManager* man = AliCDBManager::Instance();
626         AliCDBStorage* sto = man->GetStorage(fgkLocalRefStorage);
627         TString localBaseFolder = sto->GetBaseFolder();
628
629         TString targetDir = GetRefFilePrefix(localBaseFolder.Data(), detector);
630         
631         Log("SHUTTLE", Form("Cleaning %s", targetDir.Data()));
632
633         TString begin;
634         begin.Form("%d_", GetCurrentRun());
635         
636         TSystemDirectory* baseDir = new TSystemDirectory("/", targetDir);
637         if (!baseDir)
638                 return kTRUE;
639                 
640         TList* dirList = baseDir->GetListOfFiles();
641         delete baseDir;
642         
643         if (!dirList) return kTRUE;
644                         
645         if (dirList->GetEntries() < 3) 
646         {
647                 delete dirList;
648                 return kTRUE;
649         }
650                                 
651         Int_t nDirs = 0, nDel = 0;
652         TIter dirIter(dirList);
653         TSystemFile* entry = 0;
654
655         Bool_t success = kTRUE;
656         
657         while ((entry = dynamic_cast<TSystemFile*> (dirIter.Next())))
658         {                                       
659                 if (entry->IsDirectory())
660                         continue;
661                 
662                 TString fileName(entry->GetName());
663                 if (!fileName.BeginsWith(begin))
664                         continue;
665                         
666                 nDirs++;
667                                                 
668                 // delete file
669                 Int_t result = gSystem->Unlink(fileName.Data());
670                 
671                 if (result)
672                 {
673                         Log("SHUTTLE", Form("Could not delete file %s!", fileName.Data()));
674                         success = kFALSE;
675                 } else {
676                         nDel++;
677                 }
678         }
679
680         if(nDirs > 0)
681                 Log("SHUTTLE", Form("CleanReferenceStorage - %d (over %d) reference files in folder %s were deleted.", 
682                         nDel, nDirs, targetDir.Data()));
683
684                 
685         delete dirList;
686         return success;
687
688
689
690
691
692
693   Int_t result = gSystem->GetPathInfo(targetDir, 0, (Long64_t*) 0, 0, 0);
694   if (result == 0)
695   {
696     // delete directory
697     result = gSystem->Exec(Form("rm -r %s", targetDir.Data()));
698     if (result != 0)
699     {  
700       Log("SHUTTLE", Form("StoreReferenceFile - Could not clear directory %s", targetDir.Data()));
701       return kFALSE;
702     }
703   }
704
705   result = gSystem->mkdir(targetDir, kTRUE);
706   if (result != 0)
707   {
708     Log("SHUTTLE", Form("StoreReferenceFile - Error creating base directory %s", targetDir.Data()));
709     return kFALSE;
710   }
711         
712   return kTRUE;
713 }
714
715 //______________________________________________________________________________________________
716 Bool_t AliShuttle::StoreReferenceFile(const char* detector, const char* localFile, const char* gridFileName)
717 {
718         //
719         // Stores reference file directly (without opening it). This function stores the file locally.
720         //
721         // The file is stored under the following location: 
722         // <base folder of local reference storage>/<DET>/<RUN#>_<gridFileName>
723         // where <gridFileName> is the second parameter given to the function
724         // 
725         
726         if (fTestMode & kErrorStorage)
727         {
728                 Log(fCurrentDetector, "StoreReferenceFile - In TESTMODE - Simulating error while storing locally");
729                 return kFALSE;
730         }
731         
732         AliCDBManager* man = AliCDBManager::Instance();
733         AliCDBStorage* sto = man->GetStorage(fgkLocalRefStorage);
734         
735         TString localBaseFolder = sto->GetBaseFolder();
736         
737         TString targetDir = GetRefFilePrefix(localBaseFolder.Data(), detector); 
738         
739         //try to open folder, if does not exist
740         void* dir = gSystem->OpenDirectory(targetDir.Data());
741         if (dir == NULL) {
742                 if (gSystem->mkdir(targetDir.Data(), kTRUE)) {
743                         Log("SHUTTLE", Form("Can't open directory <%s>", targetDir.Data()));
744                         return kFALSE;
745                 }
746
747         } else {
748                 gSystem->FreeDirectory(dir);
749         }
750
751         TString target;
752         target.Form("%s/%d_%s", targetDir.Data(), GetCurrentRun(), gridFileName);
753         
754         Int_t result = gSystem->GetPathInfo(localFile, 0, (Long64_t*) 0, 0, 0);
755         if (result)
756         {
757                 Log("SHUTTLE", Form("StoreReferenceFile - %s does not exist", localFile));
758                 return kFALSE;
759         }
760
761         result = gSystem->CopyFile(localFile, target);
762
763         if (result == 0)
764         {
765                 Log("SHUTTLE", Form("StoreReferenceFile - File %s stored locally to %s", localFile, target.Data()));
766                 return kTRUE;
767         }
768         else
769         {
770                 Log("SHUTTLE", Form("StoreReferenceFile - Could not store file %s to %s!. Error code = %d", 
771                                 localFile, target.Data(), result));
772                 return kFALSE;
773         }       
774 }
775
776 //______________________________________________________________________________________________
777 Bool_t AliShuttle::StoreRefFilesToGrid()
778 {
779         //
780         // Transfers the reference file to the Grid.
781         //
782         // The files are stored under the following location: 
783         // <base folder of reference storage>/<DET>/<RUN#>_<gridFileName>
784         //
785         
786         AliCDBManager* man = AliCDBManager::Instance();
787         AliCDBStorage* sto = man->GetStorage(fgkLocalRefStorage);
788         if (!sto)
789                 return kFALSE;
790         TString localBaseFolder = sto->GetBaseFolder();
791                 
792         TString dir = GetRefFilePrefix(localBaseFolder.Data(), fCurrentDetector.Data());
793                 
794         AliCDBStorage* gridSto = man->GetStorage(fgkMainRefStorage);
795         if (!gridSto)
796                 return kFALSE;
797         
798         TString gridBaseFolder = gridSto->GetBaseFolder();
799
800         TString alienDir = GetRefFilePrefix(gridBaseFolder.Data(), fCurrentDetector.Data());
801         
802         TString begin;
803         begin.Form("%d_", GetCurrentRun());
804         
805         TSystemDirectory* baseDir = new TSystemDirectory("/", dir);
806         if (!baseDir)
807                 return kTRUE;
808                 
809         TList* dirList = baseDir->GetListOfFiles();
810         delete baseDir;
811         
812         if (!dirList) return kTRUE;
813                 
814         if (dirList->GetEntries() < 3) 
815         {
816                 delete dirList;
817                 return kTRUE;
818         }
819                         
820         if (!gGrid)
821         { 
822                 Log("SHUTTLE", "Connection to Grid failed: Cannot continue!");
823                 delete dirList;
824                 return kFALSE;
825         }
826         
827         Int_t nDirs = 0, nTransfer = 0;
828         TIter dirIter(dirList);
829         TSystemFile* entry = 0;
830
831         Bool_t success = kTRUE;
832         Bool_t first = kTRUE;
833         
834         while ((entry = dynamic_cast<TSystemFile*> (dirIter.Next())))
835         {                       
836                 if (entry->IsDirectory())
837                         continue;
838                         
839                 TString fileName(entry->GetName());
840                 if (!fileName.BeginsWith(begin))
841                         continue;
842                         
843                 nDirs++;
844                         
845                 if (first)
846                 {
847                         first = kFALSE;
848                         // check that DET folder exists, otherwise create it
849                         TGridResult* result = gGrid->Ls(alienDir.Data(), "a");
850                         
851                         if (!result)
852                         {
853                                 delete dirList;
854                                 return kFALSE;
855                         }
856                         
857                         if (!result->GetFileName(1)) // TODO: It looks like element 0 is always 0!!
858                         {
859                                 if (!gGrid->Mkdir(alienDir.Data(),"",0))
860                                 {
861                                         Log("SHUTTLE", Form("StoreRefFilesToGrid - Cannot create directory %s",
862                                                         alienDir.Data()));
863                                         delete dirList;
864                                         return kFALSE;
865                                 } else {
866                                         Log("SHUTTLE",Form("Folder %s created", alienDir.Data()));
867                                 }
868                                 
869                         } else {
870                                         Log("SHUTTLE",Form("Folder %s found", alienDir.Data()));
871                         }
872                 }
873                         
874                 TString fullLocalPath;
875                 fullLocalPath.Form("%s/%s", dir.Data(), fileName.Data());
876                 
877                 TString fullGridPath;
878                 fullGridPath.Form("alien://%s/%s", alienDir.Data(), fileName.Data());
879
880                 Bool_t result = TFile::Cp(fullLocalPath, fullGridPath);
881                 
882                 if (result)
883                 {
884                         Log("SHUTTLE", Form("StoreRefFilesToGrid - Copying local file %s to %s succeeded!", fullLocalPath.Data(), fullGridPath.Data()));
885                         RemoveFile(fullLocalPath);
886                         nTransfer++;
887                 }
888                 else
889                 {
890                         Log("SHUTTLE", Form("StoreRefFilesToGrid - Copying local file %s to %s FAILED!", fullLocalPath.Data(), fullGridPath.Data()));
891                         success = kFALSE;
892                 }
893         }
894
895         Log("SHUTTLE", Form("StoreRefFilesToGrid - %d (over %d) reference files in folder %s copied to Grid.", nTransfer, nDirs, dir.Data()));
896
897                 
898         delete dirList;
899         return success;
900 }
901
902 //______________________________________________________________________________________________
903 const char* AliShuttle::GetRefFilePrefix(const char* base, const char* detector)
904 {
905         //
906         // Get folder name of reference files 
907         //
908
909         TString offDetStr(GetOfflineDetName(detector));
910         TString dir;
911         if (offDetStr == "ITS" || offDetStr == "MUON" || offDetStr == "PHOS")
912         {
913                 dir.Form("%s/%s/%s", base, offDetStr.Data(), detector);
914         } else {
915                 dir.Form("%s/%s", base, offDetStr.Data());
916         }
917         
918         return dir.Data();
919         
920
921 }
922 //______________________________________________________________________________________________
923 void AliShuttle::CleanLocalStorage(const TString& uri)
924 {
925         //
926         // Called in case the preprocessor is declared failed. Remove remaining objects from the local storages.
927         //
928
929         const char* type = 0;
930         if(uri == fgkLocalCDB) {
931                 type = "OCDB";
932         } else if(uri == fgkLocalRefStorage) {
933                 type = "Reference";
934         } else {
935                 AliError(Form("Invalid storage URI: %s", uri.Data()));
936                 return;
937         }
938
939         AliCDBManager* man = AliCDBManager::Instance();
940
941         // open local storage
942         AliCDBStorage *localSto = man->GetStorage(uri);
943         if(!localSto) {
944                 Log("SHUTTLE",
945                         Form("CleanLocalStorage - cannot activate local %s storage", type));
946                 return;
947         }
948
949         TString filename(Form("%s/%s/*/Run*_v%d_s*.root",
950                 localSto->GetBaseFolder().Data(), GetOfflineDetName(fCurrentDetector.Data()), GetCurrentRun()));
951
952         AliInfo(Form("filename = %s", filename.Data()));
953
954         AliInfo(Form("Removing remaining local files from run %d and detector %s ...",
955                 GetCurrentRun(), fCurrentDetector.Data()));
956
957         RemoveFile(filename.Data());
958
959 }
960
961 //______________________________________________________________________________________________
962 void AliShuttle::RemoveFile(const char* filename)
963 {
964         //
965         // removes local file
966         //
967
968         TString command(Form("rm -f %s", filename));
969
970         Int_t result = gSystem->Exec(command.Data());
971         if(result != 0)
972         {
973                 Log("SHUTTLE", Form("RemoveFile - %s: Cannot remove file %s!",
974                         fCurrentDetector.Data(), filename));
975         }
976 }
977
978 //______________________________________________________________________________________________
979 AliShuttleStatus* AliShuttle::ReadShuttleStatus()
980 {
981         //
982         // Reads the AliShuttleStatus from the CDB
983         //
984
985         if (fStatusEntry){
986                 delete fStatusEntry;
987                 fStatusEntry = 0;
988         }
989
990         fStatusEntry = AliCDBManager::Instance()->GetStorage(GetLocalCDB())
991                 ->Get(Form("/SHUTTLE/STATUS/%s", fCurrentDetector.Data()), GetCurrentRun());
992
993         if (!fStatusEntry) return 0;
994         fStatusEntry->SetOwner(1);
995
996         AliShuttleStatus* status = dynamic_cast<AliShuttleStatus*> (fStatusEntry->GetObject());
997         if (!status) {
998                 AliError("Invalid object stored to CDB!");
999                 return 0;
1000         }
1001
1002         return status;
1003 }
1004
1005 //______________________________________________________________________________________________
1006 Bool_t AliShuttle::WriteShuttleStatus(AliShuttleStatus* status)
1007 {
1008         //
1009         // writes the status for one subdetector
1010         //
1011
1012         if (fStatusEntry){
1013                 delete fStatusEntry;
1014                 fStatusEntry = 0;
1015         }
1016
1017         Int_t run = GetCurrentRun();
1018
1019         AliCDBId id(AliCDBPath("SHUTTLE", "STATUS", fCurrentDetector), run, run);
1020
1021         fStatusEntry = new AliCDBEntry(status, id, new AliCDBMetaData);
1022         fStatusEntry->SetOwner(1);
1023
1024         UInt_t result = AliCDBManager::Instance()->GetStorage(fgkLocalCDB)->Put(fStatusEntry);
1025
1026         if (!result) {
1027                 Log("SHUTTLE", Form("WriteShuttleStatus - Failed for %s, run %d",
1028                                                 fCurrentDetector.Data(), run));
1029                 return kFALSE;
1030         }
1031         
1032         SendMLInfo();
1033
1034         return kTRUE;
1035 }
1036
1037 //______________________________________________________________________________________________
1038 void AliShuttle::UpdateShuttleStatus(AliShuttleStatus::Status newStatus, Bool_t increaseCount)
1039 {
1040         //
1041         // changes the AliShuttleStatus for the given detector and run to the given status
1042         //
1043
1044         if (!fStatusEntry){
1045                 AliError("UNEXPECTED: fStatusEntry empty");
1046                 return;
1047         }
1048
1049         AliShuttleStatus* status = dynamic_cast<AliShuttleStatus*> (fStatusEntry->GetObject());
1050
1051         if (!status){
1052                 Log("SHUTTLE", "UNEXPECTED: status could not be read from current CDB entry");
1053                 return;
1054         }
1055
1056         TString actionStr = Form("UpdateShuttleStatus - %s: Changing state from %s to %s",
1057                                 fCurrentDetector.Data(),
1058                                 status->GetStatusName(),
1059                                 status->GetStatusName(newStatus));
1060         Log("SHUTTLE", actionStr);
1061         SetLastAction(actionStr);
1062
1063         status->SetStatus(newStatus);
1064         if (increaseCount) status->IncreaseCount();
1065
1066         AliCDBManager::Instance()->GetStorage(fgkLocalCDB)->Put(fStatusEntry);
1067
1068         SendMLInfo();
1069 }
1070
1071 //______________________________________________________________________________________________
1072 void AliShuttle::SendMLInfo()
1073 {
1074         //
1075         // sends ML information about the current status of the current detector being processed
1076         //
1077         
1078         AliShuttleStatus* status = dynamic_cast<AliShuttleStatus*> (fStatusEntry->GetObject());
1079         
1080         if (!status){
1081                 Log("SHUTTLE", "SendMLInfo - UNEXPECTED: status could not be read from current CDB entry");
1082                 return;
1083         }
1084         
1085         TMonaLisaText  mlStatus(Form("%s_status", fCurrentDetector.Data()), status->GetStatusName());
1086         TMonaLisaValue mlRetryCount(Form("%s_count", fCurrentDetector.Data()), status->GetCount());
1087
1088         TList mlList;
1089         mlList.Add(&mlStatus);
1090         mlList.Add(&mlRetryCount);
1091
1092         fMonaLisa->SendParameters(&mlList);
1093 }
1094
1095 //______________________________________________________________________________________________
1096 Bool_t AliShuttle::ContinueProcessing()
1097 {
1098         // this function reads the AliShuttleStatus information from CDB and
1099         // checks if the processing should be continued
1100         // if yes it returns kTRUE and updates the AliShuttleStatus with nextStatus
1101
1102         if (!fConfig->HostProcessDetector(fCurrentDetector)) return kFALSE;
1103
1104         AliPreprocessor* aPreprocessor =
1105                 dynamic_cast<AliPreprocessor*> (fPreprocessorMap.GetValue(fCurrentDetector));
1106         if (!aPreprocessor)
1107         {
1108                 AliInfo(Form("%s: no preprocessor registered", fCurrentDetector.Data()));
1109                 return kFALSE;
1110         }
1111
1112         AliShuttleLogbookEntry::Status entryStatus =
1113                 fLogbookEntry->GetDetectorStatus(fCurrentDetector);
1114
1115         if(entryStatus != AliShuttleLogbookEntry::kUnprocessed) {
1116                 AliInfo(Form("ContinueProcessing - %s is %s",
1117                                 fCurrentDetector.Data(),
1118                                 fLogbookEntry->GetDetectorStatusName(entryStatus)));
1119                 return kFALSE;
1120         }
1121
1122         // if we get here, according to Shuttle logbook subdetector is in UNPROCESSED state
1123
1124         // check if current run is first unprocessed run for current detector
1125         if (fConfig->StrictRunOrder(fCurrentDetector) &&
1126                 !fFirstUnprocessed[GetDetPos(fCurrentDetector)])
1127         {
1128                 if (fTestMode == kNone)
1129                 {
1130                         Log("SHUTTLE", Form("ContinueProcessing - %s requires strict run ordering but this is not the first unprocessed run!"));
1131                         return kFALSE;
1132                 }
1133                 else
1134                 {
1135                         Log("SHUTTLE", Form("ContinueProcessing - In TESTMODE - Although %s requires strict run ordering and this is not the first unprocessed run, the SHUTTLE continues"));
1136                 }
1137         }
1138
1139         AliShuttleStatus* status = ReadShuttleStatus();
1140         if (!status) {
1141                 // first time
1142                 Log("SHUTTLE", Form("ContinueProcessing - %s: Processing first time",
1143                                 fCurrentDetector.Data()));
1144                 status = new AliShuttleStatus(AliShuttleStatus::kStarted);
1145                 return WriteShuttleStatus(status);
1146         }
1147
1148         // The following two cases shouldn't happen if Shuttle Logbook was correctly updated.
1149         // If it happens it may mean Logbook updating failed... let's do it now!
1150         if (status->GetStatus() == AliShuttleStatus::kDone ||
1151             status->GetStatus() == AliShuttleStatus::kFailed){
1152                 Log("SHUTTLE", Form("ContinueProcessing - %s is already %s. Updating Shuttle Logbook",
1153                                         fCurrentDetector.Data(),
1154                                         status->GetStatusName(status->GetStatus())));
1155                 UpdateShuttleLogbook(fCurrentDetector.Data(),
1156                                         status->GetStatusName(status->GetStatus()));
1157                 return kFALSE;
1158         }
1159
1160         if (status->GetStatus() == AliShuttleStatus::kStoreError) {
1161                 Log("SHUTTLE",
1162                         Form("ContinueProcessing - %s: Grid storage of one or more objects failed. Trying again now",
1163                                 fCurrentDetector.Data()));
1164                 UpdateShuttleStatus(AliShuttleStatus::kStoreStarted);
1165                 if (StoreOCDB()){
1166                         Log("SHUTTLE", Form("ContinueProcessing - %s: all objects successfully stored into main storage",
1167                                 fCurrentDetector.Data()));
1168                         UpdateShuttleStatus(AliShuttleStatus::kDone);
1169                         UpdateShuttleLogbook(fCurrentDetector.Data(), "DONE");
1170                 } else {
1171                         Log("SHUTTLE",
1172                                 Form("ContinueProcessing - %s: Grid storage failed again",
1173                                         fCurrentDetector.Data()));
1174                         UpdateShuttleStatus(AliShuttleStatus::kStoreError);
1175                 }
1176                 return kFALSE;
1177         }
1178
1179         // if we get here, there is a restart
1180         Bool_t cont = kFALSE;
1181
1182         // abort conditions
1183         if (status->GetCount() >= fConfig->GetMaxRetries()) {
1184                 Log("SHUTTLE", Form("ContinueProcessing - %s failed %d times in status %s - "
1185                                 "Updating Shuttle Logbook", fCurrentDetector.Data(),
1186                                 status->GetCount(), status->GetStatusName()));
1187                 UpdateShuttleLogbook(fCurrentDetector.Data(), "FAILED");
1188                 UpdateShuttleStatus(AliShuttleStatus::kFailed);
1189
1190                 // there may still be objects in local OCDB and reference storage
1191                 // and FXS databases may be not updated: do it now!
1192                 
1193                 // TODO Currently disabled, we want to keep files in case of failure!
1194                 // CleanLocalStorage(fgkLocalCDB);
1195                 // CleanLocalStorage(fgkLocalRefStorage);
1196                 // UpdateTableFailCase();
1197                 
1198                 // Send mail to detector expert!
1199                 AliInfo(Form("Sending mail to %s expert...", fCurrentDetector.Data()));
1200                 if (!SendMail())
1201                         Log("SHUTTLE", Form("ContinueProcessing - Could not send mail to %s expert",
1202                                         fCurrentDetector.Data()));
1203
1204         } else {
1205                 Log("SHUTTLE", Form("ContinueProcessing - %s: restarting. "
1206                                 "Aborted before with %s. Retry number %d.", fCurrentDetector.Data(),
1207                                 status->GetStatusName(), status->GetCount()));
1208                 Bool_t increaseCount = kTRUE;
1209                 if (status->GetStatus() == AliShuttleStatus::kDCSError || status->GetStatus() == AliShuttleStatus::kDCSStarted)
1210                         increaseCount = kFALSE;
1211                 UpdateShuttleStatus(AliShuttleStatus::kStarted, increaseCount);
1212                 cont = kTRUE;
1213         }
1214
1215         return cont;
1216 }
1217
1218 //______________________________________________________________________________________________
1219 Bool_t AliShuttle::Process(AliShuttleLogbookEntry* entry)
1220 {
1221         //
1222         // Makes data retrieval for all detectors in the configuration.
1223         // entry: Shuttle logbook entry, contains run paramenters and status of detectors
1224         // (Unprocessed, Inactive, Failed or Done).
1225         // Returns kFALSE in case of error occured and kTRUE otherwise
1226         //
1227
1228         if (!entry) return kFALSE;
1229
1230         fLogbookEntry = entry;
1231
1232         AliInfo(Form("\n\n \t\t\t^*^*^*^*^*^*^*^*^*^*^*^* run %d: START ^*^*^*^*^*^*^*^*^*^*^*^* \n",
1233                                         GetCurrentRun()));
1234
1235         // create ML instance that monitors this run
1236         fMonaLisa = new TMonaLisaWriter(Form("%d", GetCurrentRun()), "SHUTTLE", "aliendb1.cern.ch");
1237         // disable monitoring of other parameters that come e.g. from TFile
1238         gMonitoringWriter = 0;
1239
1240         // Send the information to ML
1241         TMonaLisaText  mlStatus("SHUTTLE_status", "Processing");
1242         TMonaLisaText  mlRunType("SHUTTLE_runtype", Form("%s (%s)", entry->GetRunType(), entry->GetRunParameter("log")));
1243
1244         TList mlList;
1245         mlList.Add(&mlStatus);
1246         mlList.Add(&mlRunType);
1247
1248         fMonaLisa->SendParameters(&mlList);
1249
1250         if (fLogbookEntry->IsDone())
1251         {
1252                 Log("SHUTTLE","Process - Shuttle is already DONE. Updating logbook");
1253                 UpdateShuttleLogbook("shuttle_done");
1254                 fLogbookEntry = 0;
1255                 return kTRUE;
1256         }
1257
1258         // read test mode if flag is set
1259         if (fReadTestMode)
1260         {
1261                 fTestMode = kNone;
1262                 TString logEntry(entry->GetRunParameter("log"));
1263                 //printf("log entry = %s\n", logEntry.Data());
1264                 TString searchStr("Testmode: ");
1265                 Int_t pos = logEntry.Index(searchStr.Data());
1266                 //printf("%d\n", pos);
1267                 if (pos >= 0)
1268                 {
1269                         TSubString subStr = logEntry(pos + searchStr.Length(), logEntry.Length());
1270                         //printf("%s\n", subStr.String().Data());
1271                         TString newStr(subStr.Data());
1272                         TObjArray* token = newStr.Tokenize(' ');
1273                         if (token)
1274                         {
1275                                 //token->Print();
1276                                 TObjString* tmpStr = dynamic_cast<TObjString*> (token->First());
1277                                 if (tmpStr)
1278                                 {
1279                                         Int_t testMode = tmpStr->String().Atoi();
1280                                         if (testMode > 0)
1281                                         {
1282                                                 Log("SHUTTLE", Form("Enabling test mode %d", testMode));
1283                                                 SetTestMode((TestMode) testMode);
1284                                         }
1285                                 }
1286                                 delete token;          
1287                         }
1288                 }
1289         }
1290         
1291         Log("SHUTTLE", Form("The test mode flag is %d", (Int_t) fTestMode));
1292         
1293         fLogbookEntry->Print("all");
1294
1295         // Initialization
1296         Bool_t hasError = kFALSE;
1297
1298         AliCDBStorage *mainCDBSto = AliCDBManager::Instance()->GetStorage(fgkMainCDB);
1299         if(mainCDBSto) mainCDBSto->QueryCDB(GetCurrentRun());
1300         AliCDBStorage *mainRefSto = AliCDBManager::Instance()->GetStorage(fgkMainRefStorage);
1301         if(mainRefSto) mainRefSto->QueryCDB(GetCurrentRun());
1302
1303         // Loop on detectors in the configuration
1304         TIter iter(fConfig->GetDetectors());
1305         TObjString* aDetector = 0;
1306
1307         while ((aDetector = (TObjString*) iter.Next()))
1308         {
1309                 fCurrentDetector = aDetector->String();
1310
1311                 if (ContinueProcessing() == kFALSE) continue;
1312
1313                 AliInfo(Form("\n\n \t\t\t****** run %d - %s: START  ******",
1314                                                 GetCurrentRun(), aDetector->GetName()));
1315
1316                 for(Int_t iSys=0;iSys<3;iSys++) fFXSCalled[iSys]=kFALSE;
1317
1318                 Log(fCurrentDetector.Data(), "Starting processing");
1319
1320                 Int_t pid = fork();
1321
1322                 if (pid < 0)
1323                 {
1324                         Log("SHUTTLE", "ERROR: Forking failed");
1325                 }
1326                 else if (pid > 0)
1327                 {
1328                         // parent
1329                         AliInfo(Form("In parent process of %d - %s: Starting monitoring",
1330                                                         GetCurrentRun(), aDetector->GetName()));
1331
1332                         Long_t begin = time(0);
1333
1334                         int status; // to be used with waitpid, on purpose an int (not Int_t)!
1335                         while (waitpid(pid, &status, WNOHANG) == 0)
1336                         {
1337                                 Long_t expiredTime = time(0) - begin;
1338
1339                                 if (expiredTime > fConfig->GetPPTimeOut())
1340                                 {
1341                                         TString tmp;
1342                                         tmp.Form("Process of %s time out. Run time: %d seconds. Killing...",
1343                                                                 fCurrentDetector.Data(), expiredTime);
1344                                         Log("SHUTTLE", tmp);
1345                                         Log(fCurrentDetector, tmp);
1346
1347                                         kill(pid, 9);
1348
1349                                         UpdateShuttleStatus(AliShuttleStatus::kPPTimeOut);
1350                                         hasError = kTRUE;
1351
1352                                         gSystem->Sleep(1000);
1353                                 }
1354                                 else
1355                                 {
1356                                         gSystem->Sleep(1000);
1357                                         
1358                                         TString checkStr;
1359                                         checkStr.Form("ps -o vsize --pid %d | tail -n 1", pid);
1360                                         FILE* pipe = gSystem->OpenPipe(checkStr, "r");
1361                                         if (!pipe)
1362                                         {
1363                                                 Log("SHUTTLE", Form("Error: Could not open pipe to %s", checkStr.Data()));
1364                                                 continue;
1365                                         }
1366                                                 
1367                                         char buffer[100];
1368                                         if (!fgets(buffer, 100, pipe))
1369                                         {
1370                                                 Log("SHUTTLE", "Error: ps did not return anything");
1371                                                 gSystem->ClosePipe(pipe);
1372                                                 continue;
1373                                         }
1374                                         gSystem->ClosePipe(pipe);
1375                                         
1376                                         //Log("SHUTTLE", Form("ps returned %s", buffer));
1377                                         
1378                                         Int_t mem = 0;
1379                                         if ((sscanf(buffer, "%d\n", &mem) != 1) || !mem)
1380                                         {
1381                                                 Log("SHUTTLE", "Error: Could not parse output of ps");
1382                                                 continue;
1383                                         }
1384                                         
1385                                         if (expiredTime % 60 == 0)
1386                                                 Log("SHUTTLE", Form("%s: Checking process. Run time: %d seconds - Memory consumption: %d KB",
1387                                                                 fCurrentDetector.Data(), expiredTime, mem));
1388                                         
1389                                         if (mem > fConfig->GetPPMaxMem())
1390                                         {
1391                                                 TString tmp;
1392                                                 tmp.Form("Process exceeds maximum allowed memory (%d KB > %d KB). Killing...",
1393                                                         mem, fConfig->GetPPMaxMem());
1394                                                 Log("SHUTTLE", tmp);
1395                                                 Log(fCurrentDetector, tmp);
1396         
1397                                                 kill(pid, 9);
1398         
1399                                                 UpdateShuttleStatus(AliShuttleStatus::kPPOutOfMemory);
1400                                                 hasError = kTRUE;
1401         
1402                                                 gSystem->Sleep(1000);
1403                                         }
1404                                 }
1405                         }
1406
1407                         AliInfo(Form("In parent process of %d - %s: Client has terminated.",
1408                                                                 GetCurrentRun(), aDetector->GetName()));
1409
1410                         if (WIFEXITED(status))
1411                         {
1412                                 Int_t returnCode = WEXITSTATUS(status);
1413
1414                                 Log("SHUTTLE", Form("%s: the return code is %d", fCurrentDetector.Data(),
1415                                                                                 returnCode));
1416
1417                                 if (returnCode == 0) hasError = kTRUE;
1418                         }
1419                 }
1420                 else if (pid == 0)
1421                 {
1422                         // client
1423                         AliInfo(Form("In client process of %d - %s", GetCurrentRun(), aDetector->GetName()));
1424
1425                         AliInfo("Redirecting output...");
1426
1427                         if ((freopen(GetLogFileName(fCurrentDetector), "a", stdout)) == 0)
1428                         {
1429                                 Log("SHUTTLE", "Could not freopen stdout");
1430                         }
1431                         else
1432                         {
1433                                 fOutputRedirected = kTRUE;
1434                                 if ((dup2(fileno(stdout), fileno(stderr))) < 0)
1435                                         Log("SHUTTLE", "Could not redirect stderr");
1436                                 
1437                         }
1438                         
1439                         TString wd = gSystem->WorkingDirectory();
1440                         TString tmpDir = Form("%s/%s_process",GetShuttleTempDir(),fCurrentDetector.Data());
1441                         
1442                         gSystem->mkdir(tmpDir.Data());
1443                         gSystem->ChangeDirectory(tmpDir.Data());
1444                         
1445                         Bool_t success = ProcessCurrentDetector();
1446                         
1447                         gSystem->ChangeDirectory(wd.Data());
1448                         
1449                         gSystem->Exec(Form("rm -rf %s",tmpDir.Data()));
1450                         
1451                         if (success) // Preprocessor finished successfully!
1452                         { 
1453                                 // Update time_processed field in FXS DB
1454                                 if (UpdateTable() == kFALSE)
1455                                         Log("SHUTTLE", Form("Process - %s: Could not update FXS databases!", 
1456                                                         fCurrentDetector.Data()));
1457
1458                                 // Transfer the data from local storage to main storage (Grid)
1459                                 UpdateShuttleStatus(AliShuttleStatus::kStoreStarted);
1460                                 if (StoreOCDB() == kFALSE)
1461                                 {
1462                                         AliInfo(Form("\n \t\t\t****** run %d - %s: STORAGE ERROR ****** \n\n",
1463                                                         GetCurrentRun(), aDetector->GetName()));
1464                                         UpdateShuttleStatus(AliShuttleStatus::kStoreError);
1465                                         success = kFALSE;
1466                                 } else {
1467                                         AliInfo(Form("\n \t\t\t****** run %d - %s: DONE ****** \n\n",
1468                                                         GetCurrentRun(), aDetector->GetName()));
1469                                         UpdateShuttleStatus(AliShuttleStatus::kDone);
1470                                         UpdateShuttleLogbook(fCurrentDetector, "DONE");
1471                                 }
1472                         }
1473
1474                         for (UInt_t iSys=0; iSys<3; iSys++)
1475                         {
1476                                 if (fFXSCalled[iSys]) fFXSlist[iSys].Clear();
1477                         }
1478
1479                         AliInfo(Form("Client process of %d - %s is exiting now with %d.",
1480                                                         GetCurrentRun(), aDetector->GetName(), success));
1481
1482                         // the client exits here
1483                         gSystem->Exit(success);
1484
1485                         AliError("We should never get here!!!");
1486                 }
1487         }
1488
1489         AliInfo(Form("\n\n \t\t\t^*^*^*^*^*^*^*^*^*^*^*^* run %d: FINISH ^*^*^*^*^*^*^*^*^*^*^*^* \n",
1490                                                         GetCurrentRun()));
1491
1492         //check if shuttle is done for this run, if so update logbook
1493         TObjArray checkEntryArray;
1494         checkEntryArray.SetOwner(1);
1495         TString whereClause = Form("where run=%d", GetCurrentRun());
1496         if (!QueryShuttleLogbook(whereClause.Data(), checkEntryArray) || checkEntryArray.GetEntries() == 0) {
1497                 Log("SHUTTLE", Form("Process - Warning: Cannot check status of run %d on Shuttle logbook!",
1498                                                 GetCurrentRun()));
1499                 return hasError == kFALSE;
1500         }
1501
1502         AliShuttleLogbookEntry* checkEntry = dynamic_cast<AliShuttleLogbookEntry*>
1503                                                 (checkEntryArray.At(0));
1504
1505         if (checkEntry)
1506         {
1507                 if (checkEntry->IsDone())
1508                 {
1509                         Log("SHUTTLE","Process - Shuttle is DONE. Updating logbook");
1510                         UpdateShuttleLogbook("shuttle_done");
1511                 }
1512                 else
1513                 {
1514                         for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
1515                         {
1516                                 if (checkEntry->GetDetectorStatus(iDet) == AliShuttleLogbookEntry::kUnprocessed)
1517                                 {
1518                                         AliDebug(2, Form("Run %d: setting %s as \"not first time unprocessed\"",
1519                                                         checkEntry->GetRun(), GetDetName(iDet)));
1520                                         fFirstUnprocessed[iDet] = kFALSE;
1521                                 }
1522                         }
1523                 }
1524         }
1525
1526         // remove ML instance
1527         delete fMonaLisa;
1528         fMonaLisa = 0;
1529
1530         fLogbookEntry = 0;
1531
1532         return hasError == kFALSE;
1533 }
1534
1535 //______________________________________________________________________________________________
1536 Bool_t AliShuttle::ProcessCurrentDetector()
1537 {
1538         //
1539         // Makes data retrieval just for a specific detector (fCurrentDetector).
1540         // Threre should be a configuration for this detector.
1541
1542         Log("SHUTTLE", Form("ProcessCurrentDetector - Retrieving values for %s, run %d", 
1543                                                 fCurrentDetector.Data(), GetCurrentRun()));
1544
1545         if (!CleanReferenceStorage(fCurrentDetector.Data()))
1546                 return kFALSE;
1547
1548         TMap* dcsMap = new TMap();
1549
1550         // call preprocessor
1551         AliPreprocessor* aPreprocessor =
1552                 dynamic_cast<AliPreprocessor*> (fPreprocessorMap.GetValue(fCurrentDetector));
1553
1554         aPreprocessor->Initialize(GetCurrentRun(), GetCurrentStartTime(), GetCurrentEndTime());
1555
1556         Bool_t processDCS = aPreprocessor->ProcessDCS();
1557
1558         if (!processDCS)
1559         {
1560                 Log(fCurrentDetector, "ProcessCurrentDetector -"
1561                         " The preprocessor requested to skip the retrieval of DCS values");
1562         }
1563         else if (fTestMode & kSkipDCS)
1564         {
1565                 Log(fCurrentDetector, "ProcessCurrentDetector - In TESTMODE: Skipping DCS processing");
1566         } 
1567         else if (fTestMode & kErrorDCS)
1568         {
1569                 Log(fCurrentDetector, "ProcessCurrentDetector - In TESTMODE: Simulating DCS error");
1570                 UpdateShuttleStatus(AliShuttleStatus::kDCSStarted);
1571                 UpdateShuttleStatus(AliShuttleStatus::kDCSError);
1572                 delete dcsMap;
1573                 return kFALSE;
1574         } else {
1575
1576                 UpdateShuttleStatus(AliShuttleStatus::kDCSStarted);
1577
1578                 // Query DCS archive
1579                 Int_t nServers = fConfig->GetNServers(fCurrentDetector);
1580                 Log("SHUTTLE", Form("ProcessCurrentDetector -"
1581                                 " found %d Amanda servers for %s", nServers, fCurrentDetector.Data()));
1582                 
1583                 for (int iServ=0; iServ<nServers; iServ++)
1584                 {
1585                 
1586                         TString host(fConfig->GetDCSHost(fCurrentDetector, iServ));
1587                         Int_t port = fConfig->GetDCSPort(fCurrentDetector, iServ);
1588                         Int_t multiSplit = fConfig->GetMultiSplit(fCurrentDetector, iServ);
1589
1590                         
1591                         TMap* aliasMap = 0;
1592                         TMap* dpMap = 0;
1593         
1594                         if (fConfig->GetDCSAliases(fCurrentDetector, iServ)->GetEntries() > 0)
1595                         {
1596                                 aliasMap = GetValueSet(host, port, 
1597                                                 fConfig->GetDCSAliases(fCurrentDetector, iServ), 
1598                                                 kAlias, multiSplit);
1599                                 if (!aliasMap)
1600                                 {
1601                                         Log(fCurrentDetector, 
1602                                                 Form("ProcessCurrentDetector -"
1603                                                         " Error retrieving DCS aliases from server %s", 
1604                                                                 host.Data()));
1605                                         UpdateShuttleStatus(AliShuttleStatus::kDCSError);
1606                                         delete dcsMap;
1607                                         return kFALSE;
1608                                 }
1609                         }
1610                         
1611                         if (fConfig->GetDCSDataPoints(fCurrentDetector, iServ)->GetEntries() > 0)
1612                         {
1613                                 dpMap = GetValueSet(host, port, 
1614                                                 fConfig->GetDCSDataPoints(fCurrentDetector, iServ), 
1615                                                 kDP, multiSplit);
1616                                 if (!dpMap)
1617                                 {
1618                                         Log(fCurrentDetector, 
1619                                                 Form("ProcessCurrentDetector -"
1620                                                         " Error retrieving DCS data points from server %s", 
1621                                                                 host.Data()));
1622                                         UpdateShuttleStatus(AliShuttleStatus::kDCSError);
1623                                         if (aliasMap) delete aliasMap;
1624                                         delete dcsMap;
1625                                         return kFALSE;
1626                                 }                               
1627                         }
1628                         
1629                         // merge aliasMap and dpMap into dcsMap
1630                         if(aliasMap) {
1631                                 TIter iter(aliasMap);
1632                                 TObjString* key = 0;
1633                                 while ((key = (TObjString*) iter.Next()))
1634                                         dcsMap->Add(key, aliasMap->GetValue(key->String()));
1635                                 
1636                                 aliasMap->SetOwner(kFALSE);
1637                                 delete aliasMap;
1638                         }       
1639                         
1640                         if(dpMap) {
1641                                 TIter iter(dpMap);
1642                                 TObjString* key = 0;
1643                                 while ((key = (TObjString*) iter.Next()))
1644                                         dcsMap->Add(key, dpMap->GetValue(key->String()));
1645                                 
1646                                 dpMap->SetOwner(kFALSE);
1647                                 delete dpMap;
1648                         }
1649                 }
1650         }
1651         
1652         // DCS Archive DB processing successful. Call Preprocessor!
1653         UpdateShuttleStatus(AliShuttleStatus::kPPStarted);
1654
1655         UInt_t returnValue = aPreprocessor->Process(dcsMap);
1656
1657         if (returnValue > 0) // Preprocessor error!
1658         {
1659                 Log(fCurrentDetector, Form("Preprocessor failed. Process returned %d.", returnValue));
1660                 UpdateShuttleStatus(AliShuttleStatus::kPPError);
1661                 dcsMap->DeleteAll();
1662                 delete dcsMap;
1663                 return kFALSE;
1664         }
1665         
1666         // preprocessor ok!
1667         UpdateShuttleStatus(AliShuttleStatus::kPPDone);
1668         Log(fCurrentDetector, Form("ProcessCurrentDetector - %s preprocessor returned success",
1669                                 fCurrentDetector.Data()));
1670
1671         dcsMap->DeleteAll();
1672         delete dcsMap;
1673
1674         return kTRUE;
1675 }
1676
1677 //______________________________________________________________________________________________
1678 Bool_t AliShuttle::QueryShuttleLogbook(const char* whereClause,
1679                 TObjArray& entries)
1680 {
1681         // Query DAQ's Shuttle logbook and fills detector status object.
1682         // Call QueryRunParameters to query DAQ logbook for run parameters.
1683         //
1684
1685         entries.SetOwner(1);
1686
1687         // check connection, in case connect
1688         if(!Connect(3)) return kFALSE;
1689
1690         TString sqlQuery;
1691         sqlQuery = Form("select * from %s %s order by run", fConfig->GetShuttlelbTable(), whereClause);
1692
1693         TSQLResult* aResult = fServer[3]->Query(sqlQuery);
1694         if (!aResult) {
1695                 AliError(Form("Can't execute query <%s>!", sqlQuery.Data()));
1696                 return kFALSE;
1697         }
1698
1699         AliDebug(2,Form("Query = %s", sqlQuery.Data()));
1700
1701         if(aResult->GetRowCount() == 0) {
1702                 AliInfo("No entries in Shuttle Logbook match request");
1703                 delete aResult;
1704                 return kTRUE;
1705         }
1706
1707         // TODO Check field count!
1708         const UInt_t nCols = 23;
1709         if (aResult->GetFieldCount() != (Int_t) nCols) {
1710                 AliError("Invalid SQL result field number!");
1711                 delete aResult;
1712                 return kFALSE;
1713         }
1714
1715         TSQLRow* aRow;
1716         while ((aRow = aResult->Next())) {
1717                 TString runString(aRow->GetField(0), aRow->GetFieldLength(0));
1718                 Int_t run = runString.Atoi();
1719
1720                 AliShuttleLogbookEntry *entry = QueryRunParameters(run);
1721                 if (!entry)
1722                         continue;
1723
1724                 // loop on detectors
1725                 for(UInt_t ii = 0; ii < nCols; ii++)
1726                         entry->SetDetectorStatus(aResult->GetFieldName(ii), aRow->GetField(ii));
1727
1728                 entries.AddLast(entry);
1729                 delete aRow;
1730         }
1731
1732         delete aResult;
1733         return kTRUE;
1734 }
1735
1736 //______________________________________________________________________________________________
1737 AliShuttleLogbookEntry* AliShuttle::QueryRunParameters(Int_t run)
1738 {
1739         //
1740         // Retrieve run parameters written in the DAQ logbook and sets them into AliShuttleLogbookEntry object
1741         //
1742
1743         // check connection, in case connect
1744         if (!Connect(3))
1745                 return 0;
1746
1747         TString sqlQuery;
1748         sqlQuery.Form("select * from %s where run=%d", fConfig->GetDAQlbTable(), run);
1749
1750         TSQLResult* aResult = fServer[3]->Query(sqlQuery);
1751         if (!aResult) {
1752                 AliError(Form("Can't execute query <%s>!", sqlQuery.Data()));
1753                 return 0;
1754         }
1755
1756         if (aResult->GetRowCount() == 0) {
1757                 Log("SHUTTLE", Form("QueryRunParameters - No entry in DAQ Logbook for run %d. Skipping", run));
1758                 delete aResult;
1759                 return 0;
1760         }
1761
1762         if (aResult->GetRowCount() > 1) {
1763                 AliError(Form("More than one entry in DAQ Logbook for run %d. Skipping", run));
1764                 delete aResult;
1765                 return 0;
1766         }
1767
1768         TSQLRow* aRow = aResult->Next();
1769         if (!aRow)
1770         {
1771                 AliError(Form("Could not retrieve row for run %d. Skipping", run));
1772                 delete aResult;
1773                 return 0;
1774         }
1775
1776         AliShuttleLogbookEntry* entry = new AliShuttleLogbookEntry(run);
1777
1778         for (Int_t ii = 0; ii < aResult->GetFieldCount(); ii++)
1779                 entry->SetRunParameter(aResult->GetFieldName(ii), aRow->GetField(ii));
1780
1781         UInt_t startTime = entry->GetStartTime();
1782         UInt_t endTime = entry->GetEndTime();
1783
1784         if (!startTime || !endTime || startTime > endTime) {
1785                 Log("SHUTTLE",
1786                         Form("QueryRunParameters - Invalid parameters for Run %d: startTime = %d, endTime = %d",
1787                                 run, startTime, endTime));
1788                 delete entry;
1789                 delete aRow;
1790                 delete aResult;
1791                 return 0;
1792         }
1793
1794         delete aRow;
1795         delete aResult;
1796
1797         return entry;
1798 }
1799
1800 //______________________________________________________________________________________________
1801 TMap* AliShuttle::GetValueSet(const char* host, Int_t port, const TSeqCollection* entries,
1802                               DCSType type, Int_t multiSplit)
1803 {
1804         // Retrieve all "entry" data points from the DCS server
1805         // host, port: TSocket connection parameters
1806         // entries: list of name of the alias or data point
1807         // type: kAlias or kDP
1808         // returns TMap of values, 0 when failure
1809         
1810         AliDCSClient client(host, port, fTimeout, fRetries, multiSplit);
1811
1812         TMap* result = 0;
1813         if (type == kAlias)
1814         {
1815                 result = client.GetAliasValues(entries, GetCurrentStartTime(), 
1816                         GetCurrentEndTime());
1817         } 
1818         else if (type == kDP)
1819         {
1820                 result = client.GetDPValues(entries, GetCurrentStartTime(), 
1821                         GetCurrentEndTime());
1822         }
1823
1824         if (result == 0)
1825         {
1826                 Log(fCurrentDetector.Data(), Form("GetValueSet - Can't get entries! Reason: %s",
1827                         client.GetServerError().Data()));
1828
1829                 return 0;
1830         }
1831                 
1832         return result;
1833 }
1834
1835 //______________________________________________________________________________________________
1836 const char* AliShuttle::GetFile(Int_t system, const char* detector,
1837                 const char* id, const char* source)
1838 {
1839         // Get calibration file from file exchange servers
1840         // First queris the FXS database for the file name, using the run, detector, id and source info
1841         // then calls RetrieveFile(filename) for actual copy to local disk
1842         // run: current run being processed (given by Logbook entry fLogbookEntry)
1843         // detector: the Preprocessor name
1844         // id: provided as a parameter by the Preprocessor
1845         // source: provided by the Preprocessor through GetFileSources function
1846
1847         // check if test mode should simulate a FXS error
1848         if (fTestMode & kErrorFXSFiles)
1849         {
1850                 Log(detector, Form("GetFile - In TESTMODE - Simulating error while connecting to %s FXS", GetSystemName(system)));
1851                 return 0;
1852         }
1853         
1854         // check connection, in case connect
1855         if (!Connect(system))
1856         {
1857                 Log(detector, Form("GetFile - Couldn't connect to %s FXS database", GetSystemName(system)));
1858                 return 0;
1859         }
1860
1861         // Query preparation
1862         TString sourceName(source);
1863         Int_t nFields = 3;
1864         TString sqlQueryStart = Form("select filePath,size,fileChecksum from %s where",
1865                                                                 fConfig->GetFXSdbTable(system));
1866         TString whereClause = Form("run=%d and detector=\"%s\" and fileId=\"%s\"",
1867                                                                 GetCurrentRun(), detector, id);
1868
1869         if (system == kDAQ)
1870         {
1871                 whereClause += Form(" and DAQsource=\"%s\"", source);
1872         }
1873         else if (system == kDCS)
1874         {
1875                 sourceName="none";
1876         }
1877         else if (system == kHLT)
1878         {
1879                 whereClause += Form(" and DDLnumbers=\"%s\"", source);
1880                 nFields = 3;
1881         }
1882
1883         TString sqlQuery = Form("%s %s", sqlQueryStart.Data(), whereClause.Data());
1884
1885         AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
1886
1887         // Query execution
1888         TSQLResult* aResult = 0;
1889         aResult = dynamic_cast<TSQLResult*> (fServer[system]->Query(sqlQuery));
1890         if (!aResult) {
1891                 Log(detector, Form("GetFileName - Can't execute SQL query to %s database for: id = %s, source = %s",
1892                                 GetSystemName(system), id, sourceName.Data()));
1893                 return 0;
1894         }
1895
1896         if(aResult->GetRowCount() == 0)
1897         {
1898                 Log(detector,
1899                         Form("GetFileName - No entry in %s FXS db for: id = %s, source = %s",
1900                                 GetSystemName(system), id, sourceName.Data()));
1901                 delete aResult;
1902                 return 0;
1903         }
1904
1905         if (aResult->GetRowCount() > 1) {
1906                 Log(detector,
1907                         Form("GetFileName - More than one entry in %s FXS db for: id = %s, source = %s",
1908                                 GetSystemName(system), id, sourceName.Data()));
1909                 delete aResult;
1910                 return 0;
1911         }
1912
1913         if (aResult->GetFieldCount() != nFields) {
1914                 Log(detector,
1915                         Form("GetFileName - Wrong field count in %s FXS db for: id = %s, source = %s",
1916                                 GetSystemName(system), id, sourceName.Data()));
1917                 delete aResult;
1918                 return 0;
1919         }
1920
1921         TSQLRow* aRow = dynamic_cast<TSQLRow*> (aResult->Next());
1922
1923         if (!aRow){
1924                 Log(detector, Form("GetFileName - Empty set result in %s FXS db from query: id = %s, source = %s",
1925                                 GetSystemName(system), id, sourceName.Data()));
1926                 delete aResult;
1927                 return 0;
1928         }
1929
1930         TString filePath(aRow->GetField(0), aRow->GetFieldLength(0));
1931         TString fileSize(aRow->GetField(1), aRow->GetFieldLength(1));
1932         TString fileChecksum(aRow->GetField(2), aRow->GetFieldLength(2));
1933
1934         delete aResult;
1935         delete aRow;
1936
1937         AliDebug(2, Form("filePath = %s; size = %s, fileChecksum = %s",
1938                                 filePath.Data(), fileSize.Data(), fileChecksum.Data()));
1939
1940         // retrieved file is renamed to make it unique
1941         TString localFileName = Form("%s_%s_%d_%s_%s.shuttle",
1942                                         GetSystemName(system), detector, GetCurrentRun(), id, sourceName.Data());
1943
1944
1945         // file retrieval from FXS
1946         UInt_t nRetries = 0;
1947         UInt_t maxRetries = 3;
1948         Bool_t result = kFALSE;
1949
1950         // copy!! if successful TSystem::Exec returns 0
1951         while(nRetries++ < maxRetries) {
1952                 AliDebug(2, Form("Trying to copy file. Retry # %d", nRetries));
1953                 result = RetrieveFile(system, filePath.Data(), localFileName.Data());
1954                 if(!result)
1955                 {
1956                         Log(detector, Form("GetFileName - Copy of file %s from %s FXS failed",
1957                                         filePath.Data(), GetSystemName(system)));
1958                         continue;
1959                 } 
1960
1961                 if (fileChecksum.Length()>0)
1962                 {
1963                         // compare md5sum of local file with the one stored in the FXS DB
1964                         Int_t md5Comp = gSystem->Exec(Form("md5sum %s/%s |grep %s 2>&1 > /dev/null",
1965                                                 GetShuttleTempDir(), localFileName.Data(), fileChecksum.Data()));
1966
1967                         if (md5Comp != 0)
1968                         {
1969                                 Log(detector, Form("GetFileName - md5sum of file %s does not match with local copy!",
1970                                                         filePath.Data()));
1971                                 result = kFALSE;
1972                                 continue;
1973                         }
1974                 } else {
1975                         Log(fCurrentDetector, Form("GetFile - md5sum of file %s not set in %s database, skipping comparison",
1976                                                         filePath.Data(), GetSystemName(system)));
1977                 }
1978                 if (result) break;
1979         }
1980
1981         if(!result) return 0;
1982
1983         fFXSCalled[system]=kTRUE;
1984         TObjString *fileParams = new TObjString(Form("%s#!?!#%s", id, sourceName.Data()));
1985         fFXSlist[system].Add(fileParams);
1986
1987         static TString fullLocalFileName;
1988         fullLocalFileName.Form("%s/%s", GetShuttleTempDir(), localFileName.Data());
1989
1990         Log(fCurrentDetector, Form("GetFile - Retrieved file with id %s and source %s from %s to %s", id, source, GetSystemName(system), fullLocalFileName.Data()));
1991
1992         return fullLocalFileName.Data();
1993 }
1994
1995 //______________________________________________________________________________________________
1996 Bool_t AliShuttle::RetrieveFile(UInt_t system, const char* fxsFileName, const char* localFileName)
1997 {
1998         //
1999         // Copies file from FXS to local Shuttle machine
2000         //
2001
2002         // check temp directory: trying to cd to temp; if it does not exist, create it
2003         AliDebug(2, Form("Copy file %s from %s FXS into %s/%s",
2004                         GetSystemName(system), fxsFileName, GetShuttleTempDir(), localFileName));
2005
2006         void* dir = gSystem->OpenDirectory(GetShuttleTempDir());
2007         if (dir == NULL) {
2008                 if (gSystem->mkdir(GetShuttleTempDir(), kTRUE)) {
2009                         AliError(Form("Can't open directory <%s>", GetShuttleTempDir()));
2010                         return kFALSE;
2011                 }
2012
2013         } else {
2014                 gSystem->FreeDirectory(dir);
2015         }
2016
2017         TString baseFXSFolder;
2018         if (system == kDAQ)
2019         {
2020                 baseFXSFolder = "FES/";
2021         }
2022         else if (system == kDCS)
2023         {
2024                 baseFXSFolder = "";
2025         }
2026         else if (system == kHLT)
2027         {
2028                 baseFXSFolder = "/opt/FXS/";
2029         }
2030
2031
2032         TString command = Form("scp -oPort=%d -2 %s@%s:%s%s %s/%s",
2033                 fConfig->GetFXSPort(system),
2034                 fConfig->GetFXSUser(system),
2035                 fConfig->GetFXSHost(system),
2036                 baseFXSFolder.Data(),
2037                 fxsFileName,
2038                 GetShuttleTempDir(),
2039                 localFileName);
2040
2041         AliDebug(2, Form("%s",command.Data()));
2042
2043         Bool_t result = (gSystem->Exec(command.Data()) == 0);
2044
2045         return result;
2046 }
2047
2048 //______________________________________________________________________________________________
2049 TList* AliShuttle::GetFileSources(Int_t system, const char* detector, const char* id)
2050 {
2051         //
2052         // Get sources producing the condition file Id from file exchange servers
2053         // if id is NULL all sources are returned (distinct)
2054         //
2055
2056         Log(detector, Form("GetFileSources - Retrieving sources with id %s from %s", id, GetSystemName(system)));
2057         
2058         // check if test mode should simulate a FXS error
2059         if (fTestMode & kErrorFXSSources)
2060         {
2061                 Log(detector, Form("GetFileSources - In TESTMODE - Simulating error while connecting to %s FXS", GetSystemName(system)));
2062                 return 0;
2063         }
2064
2065         if (system == kDCS)
2066         {
2067                 AliWarning("DCS system has only one source of data!");
2068                 TList *list = new TList();
2069                 list->SetOwner(1);
2070                 list->Add(new TObjString(" "));
2071                 return list;
2072         }
2073
2074         // check connection, in case connect
2075         if (!Connect(system))
2076         {
2077                 Log(detector, Form("GetFileSources - Couldn't connect to %s FXS database", GetSystemName(system)));
2078                 return NULL;
2079         }
2080
2081         TString sourceName = 0;
2082         if (system == kDAQ)
2083         {
2084                 sourceName = "DAQsource";
2085         } else if (system == kHLT)
2086         {
2087                 sourceName = "DDLnumbers";
2088         }
2089
2090         TString sqlQueryStart = Form("select distinct %s from %s where", sourceName.Data(), fConfig->GetFXSdbTable(system));
2091         TString whereClause = Form("run=%d and detector=\"%s\"",
2092                                 GetCurrentRun(), detector);
2093         if (id)
2094                 whereClause += Form(" and fileId=\"%s\"", id);
2095         TString sqlQuery = Form("%s %s", sqlQueryStart.Data(), whereClause.Data());
2096
2097         AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
2098
2099         // Query execution
2100         TSQLResult* aResult;
2101         aResult = fServer[system]->Query(sqlQuery);
2102         if (!aResult) {
2103                 Log(detector, Form("GetFileSources - Can't execute SQL query to %s database for id: %s",
2104                                 GetSystemName(system), id));
2105                 return 0;
2106         }
2107
2108         TList *list = new TList();
2109         list->SetOwner(1);
2110         
2111         if (aResult->GetRowCount() == 0)
2112         {
2113                 Log(detector,
2114                         Form("GetFileSources - No entry in %s FXS table for id: %s", GetSystemName(system), id));
2115                 delete aResult;
2116                 return list;
2117         }
2118
2119         Log(detector, Form("GetFileSources - Found %d sources", aResult->GetRowCount()));
2120
2121         TSQLRow* aRow;
2122         while ((aRow = aResult->Next()))
2123         {
2124
2125                 TString source(aRow->GetField(0), aRow->GetFieldLength(0));
2126                 AliDebug(2, Form("%s = %s", sourceName.Data(), source.Data()));
2127                 list->Add(new TObjString(source));
2128                 delete aRow;
2129         }
2130
2131         delete aResult;
2132
2133         return list;
2134 }
2135
2136 //______________________________________________________________________________________________
2137 TList* AliShuttle::GetFileIDs(Int_t system, const char* detector, const char* source)
2138 {
2139         //
2140         // Get all ids of condition files produced by a given source from file exchange servers
2141         //
2142         
2143         Log(detector, Form("GetFileIDs - Retrieving ids with source %s with %s", source, GetSystemName(system)));
2144
2145         // check if test mode should simulate a FXS error
2146         if (fTestMode & kErrorFXSSources)
2147         {
2148                 Log(detector, Form("GetFileIDs - In TESTMODE - Simulating error while connecting to %s FXS", GetSystemName(system)));
2149                 return 0;
2150         }
2151
2152         // check connection, in case connect
2153         if (!Connect(system))
2154         {
2155                 Log(detector, Form("GetFileIDs - Couldn't connect to %s FXS database", GetSystemName(system)));
2156                 return NULL;
2157         }
2158
2159         TString sourceName = 0;
2160         if (system == kDAQ)
2161         {
2162                 sourceName = "DAQsource";
2163         } else if (system == kHLT)
2164         {
2165                 sourceName = "DDLnumbers";
2166         }
2167
2168         TString sqlQueryStart = Form("select fileId from %s where", fConfig->GetFXSdbTable(system));
2169         TString whereClause = Form("run=%d and detector=\"%s\"",
2170                                 GetCurrentRun(), detector);
2171         if (sourceName.Length() > 0 && source)
2172                 whereClause += Form(" and %s=\"%s\"", sourceName.Data(), source);
2173         TString sqlQuery = Form("%s %s", sqlQueryStart.Data(), whereClause.Data());
2174
2175         AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
2176
2177         // Query execution
2178         TSQLResult* aResult;
2179         aResult = fServer[system]->Query(sqlQuery);
2180         if (!aResult) {
2181                 Log(detector, Form("GetFileIDs - Can't execute SQL query to %s database for source: %s",
2182                                 GetSystemName(system), source));
2183                 return 0;
2184         }
2185
2186         TList *list = new TList();
2187         list->SetOwner(1);
2188         
2189         if (aResult->GetRowCount() == 0)
2190         {
2191                 Log(detector,
2192                         Form("GetFileIDs - No entry in %s FXS table for source: %s", GetSystemName(system), source));
2193                 delete aResult;
2194                 return list;
2195         }
2196
2197         Log(detector, Form("GetFileIDs - Found %d ids", aResult->GetRowCount()));
2198
2199         TSQLRow* aRow;
2200
2201         while ((aRow = aResult->Next()))
2202         {
2203
2204                 TString id(aRow->GetField(0), aRow->GetFieldLength(0));
2205                 AliDebug(2, Form("fileId = %s", id.Data()));
2206                 list->Add(new TObjString(id));
2207                 delete aRow;
2208         }
2209
2210         delete aResult;
2211
2212         return list;
2213 }
2214
2215 //______________________________________________________________________________________________
2216 Bool_t AliShuttle::Connect(Int_t system)
2217 {
2218         // Connect to MySQL Server of the system's FXS MySQL databases
2219         // DAQ Logbook, Shuttle Logbook and DAQ FXS db are on the same host
2220         //
2221
2222         // check connection: if already connected return
2223         if(fServer[system] && fServer[system]->IsConnected()) return kTRUE;
2224
2225         TString dbHost, dbUser, dbPass, dbName;
2226
2227         if (system < 3) // FXS db servers
2228         {
2229                 dbHost = Form("mysql://%s:%d", fConfig->GetFXSdbHost(system), fConfig->GetFXSdbPort(system));
2230                 dbUser = fConfig->GetFXSdbUser(system);
2231                 dbPass = fConfig->GetFXSdbPass(system);
2232                 dbName =   fConfig->GetFXSdbName(system);
2233         } else { // Run & Shuttle logbook servers
2234         // TODO Will the Shuttle logbook server be the same as the Run logbook server ???
2235                 dbHost = Form("mysql://%s:%d", fConfig->GetDAQlbHost(), fConfig->GetDAQlbPort());
2236                 dbUser = fConfig->GetDAQlbUser();
2237                 dbPass = fConfig->GetDAQlbPass();
2238                 dbName =   fConfig->GetDAQlbDB();
2239         }
2240
2241         fServer[system] = TSQLServer::Connect(dbHost.Data(), dbUser.Data(), dbPass.Data());
2242         if (!fServer[system] || !fServer[system]->IsConnected()) {
2243                 if(system < 3)
2244                 {
2245                 AliError(Form("Can't establish connection to FXS database for %s",
2246                                         AliShuttleInterface::GetSystemName(system)));
2247                 } else {
2248                 AliError("Can't establish connection to Run logbook.");
2249                 }
2250                 if(fServer[system]) delete fServer[system];
2251                 return kFALSE;
2252         }
2253
2254         // Get tables
2255         TSQLResult* aResult=0;
2256         switch(system){
2257                 case kDAQ:
2258                         aResult = fServer[kDAQ]->GetTables(dbName.Data());
2259                         break;
2260                 case kDCS:
2261                         aResult = fServer[kDCS]->GetTables(dbName.Data());
2262                         break;
2263                 case kHLT:
2264                         aResult = fServer[kHLT]->GetTables(dbName.Data());
2265                         break;
2266                 default:
2267                         aResult = fServer[3]->GetTables(dbName.Data());
2268                         break;
2269         }
2270
2271         delete aResult;
2272         return kTRUE;
2273 }
2274
2275 //______________________________________________________________________________________________
2276 Bool_t AliShuttle::UpdateTable()
2277 {
2278         //
2279         // Update FXS table filling time_processed field in all rows corresponding to current run and detector
2280         //
2281
2282         Bool_t result = kTRUE;
2283
2284         for (UInt_t system=0; system<3; system++)
2285         {
2286                 if(!fFXSCalled[system]) continue;
2287
2288                 // check connection, in case connect
2289                 if (!Connect(system))
2290                 {
2291                         Log(fCurrentDetector, Form("UpdateTable - Couldn't connect to %s FXS database", GetSystemName(system)));
2292                         result = kFALSE;
2293                         continue;
2294                 }
2295
2296                 TTimeStamp now; // now
2297
2298                 // Loop on FXS list entries
2299                 TIter iter(&fFXSlist[system]);
2300                 TObjString *aFXSentry=0;
2301                 while ((aFXSentry = dynamic_cast<TObjString*> (iter.Next())))
2302                 {
2303                         TString aFXSentrystr = aFXSentry->String();
2304                         TObjArray *aFXSarray = aFXSentrystr.Tokenize("#!?!#");
2305                         if (!aFXSarray || aFXSarray->GetEntries() != 2 )
2306                         {
2307                                 Log(fCurrentDetector, Form("UpdateTable - error updating %s FXS entry. Check string: <%s>",
2308                                         GetSystemName(system), aFXSentrystr.Data()));
2309                                 if(aFXSarray) delete aFXSarray;
2310                                 result = kFALSE;
2311                                 continue;
2312                         }
2313                         const char* fileId = ((TObjString*) aFXSarray->At(0))->GetName();
2314                         const char* source = ((TObjString*) aFXSarray->At(1))->GetName();
2315
2316                         TString whereClause;
2317                         if (system == kDAQ)
2318                         {
2319                                 whereClause = Form("where run=%d and detector=\"%s\" and fileId=\"%s\" and DAQsource=\"%s\";",
2320                                                         GetCurrentRun(), fCurrentDetector.Data(), fileId, source);
2321                         }
2322                         else if (system == kDCS)
2323                         {
2324                                 whereClause = Form("where run=%d and detector=\"%s\" and fileId=\"%s\";",
2325                                                         GetCurrentRun(), fCurrentDetector.Data(), fileId);
2326                         }
2327                         else if (system == kHLT)
2328                         {
2329                                 whereClause = Form("where run=%d and detector=\"%s\" and fileId=\"%s\" and DDLnumbers=\"%s\";",
2330                                                         GetCurrentRun(), fCurrentDetector.Data(), fileId, source);
2331                         }
2332
2333                         delete aFXSarray;
2334
2335                         TString sqlQuery = Form("update %s set time_processed=%d %s", fConfig->GetFXSdbTable(system),
2336                                                                 now.GetSec(), whereClause.Data());
2337
2338                         AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
2339
2340                         // Query execution
2341                         TSQLResult* aResult;
2342                         aResult = dynamic_cast<TSQLResult*> (fServer[system]->Query(sqlQuery));
2343                         if (!aResult)
2344                         {
2345                                 Log(fCurrentDetector, Form("UpdateTable - %s db: can't execute SQL query <%s>",
2346                                                                 GetSystemName(system), sqlQuery.Data()));
2347                                 result = kFALSE;
2348                                 continue;
2349                         }
2350                         delete aResult;
2351                 }
2352         }
2353
2354         return result;
2355 }
2356
2357 //______________________________________________________________________________________________
2358 Bool_t AliShuttle::UpdateTableFailCase()
2359 {
2360         // Update FXS table filling time_processed field in all rows corresponding to current run and detector
2361         // this is called in case the preprocessor is declared failed for the current run, because
2362         // the fields are updated only in case of success
2363
2364         Bool_t result = kTRUE;
2365
2366         for (UInt_t system=0; system<3; system++)
2367         {
2368                 // check connection, in case connect
2369                 if (!Connect(system))
2370                 {
2371                         Log(fCurrentDetector, Form("UpdateTableFailCase - Couldn't connect to %s FXS database",
2372                                                         GetSystemName(system)));
2373                         result = kFALSE;
2374                         continue;
2375                 }
2376
2377                 TTimeStamp now; // now
2378
2379                 // Loop on FXS list entries
2380
2381                 TString whereClause = Form("where run=%d and detector=\"%s\";",
2382                                                 GetCurrentRun(), fCurrentDetector.Data());
2383
2384
2385                 TString sqlQuery = Form("update %s set time_processed=%d %s", fConfig->GetFXSdbTable(system),
2386                                                         now.GetSec(), whereClause.Data());
2387
2388                 AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
2389
2390                 // Query execution
2391                 TSQLResult* aResult;
2392                 aResult = dynamic_cast<TSQLResult*> (fServer[system]->Query(sqlQuery));
2393                 if (!aResult)
2394                 {
2395                         Log(fCurrentDetector, Form("UpdateTableFailCase - %s db: can't execute SQL query <%s>",
2396                                                         GetSystemName(system), sqlQuery.Data()));
2397                         result = kFALSE;
2398                         continue;
2399                 }
2400                 delete aResult;
2401         }
2402
2403         return result;
2404 }
2405
2406 //______________________________________________________________________________________________
2407 Bool_t AliShuttle::UpdateShuttleLogbook(const char* detector, const char* status)
2408 {
2409         //
2410         // Update Shuttle logbook filling detector or shuttle_done column
2411         // ex. of usage: UpdateShuttleLogbook("PHOS", "DONE") or UpdateShuttleLogbook("shuttle_done")
2412         //
2413
2414         // check connection, in case connect
2415         if(!Connect(3)){
2416                 Log("SHUTTLE", "UpdateShuttleLogbook - Couldn't connect to DAQ Logbook.");
2417                 return kFALSE;
2418         }
2419
2420         TString detName(detector);
2421         TString setClause;
2422         if(detName == "shuttle_done")
2423         {
2424                 setClause = "set shuttle_done=1";
2425
2426                 // Send the information to ML
2427                 TMonaLisaText  mlStatus("SHUTTLE_status", "Done");
2428
2429                 TList mlList;
2430                 mlList.Add(&mlStatus);
2431
2432                 fMonaLisa->SendParameters(&mlList);
2433         } else {
2434                 TString statusStr(status);
2435                 if(statusStr.Contains("done", TString::kIgnoreCase) ||
2436                    statusStr.Contains("failed", TString::kIgnoreCase)){
2437                         setClause = Form("set %s=\"%s\"", detector, status);
2438                 } else {
2439                         Log("SHUTTLE",
2440                                 Form("UpdateShuttleLogbook - Invalid status <%s> for detector %s",
2441                                         status, detector));
2442                         return kFALSE;
2443                 }
2444         }
2445
2446         TString whereClause = Form("where run=%d", GetCurrentRun());
2447
2448         TString sqlQuery = Form("update %s %s %s",
2449                                         fConfig->GetShuttlelbTable(), setClause.Data(), whereClause.Data());
2450
2451         AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
2452
2453         // Query execution
2454         TSQLResult* aResult;
2455         aResult = dynamic_cast<TSQLResult*> (fServer[3]->Query(sqlQuery));
2456         if (!aResult) {
2457                 Log("SHUTTLE", Form("UpdateShuttleLogbook - Can't execute query <%s>", sqlQuery.Data()));
2458                 return kFALSE;
2459         }
2460         delete aResult;
2461
2462         return kTRUE;
2463 }
2464
2465 //______________________________________________________________________________________________
2466 Int_t AliShuttle::GetCurrentRun() const
2467 {
2468         //
2469         // Get current run from logbook entry
2470         //
2471
2472         return fLogbookEntry ? fLogbookEntry->GetRun() : -1;
2473 }
2474
2475 //______________________________________________________________________________________________
2476 UInt_t AliShuttle::GetCurrentStartTime() const
2477 {
2478         //
2479         // get current start time
2480         //
2481
2482         return fLogbookEntry ? fLogbookEntry->GetStartTime() : 0;
2483 }
2484
2485 //______________________________________________________________________________________________
2486 UInt_t AliShuttle::GetCurrentEndTime() const
2487 {
2488         //
2489         // get current end time from logbook entry
2490         //
2491
2492         return fLogbookEntry ? fLogbookEntry->GetEndTime() : 0;
2493 }
2494
2495 //______________________________________________________________________________________________
2496 void AliShuttle::Log(const char* detector, const char* message)
2497 {
2498         //
2499         // Fill log string with a message
2500         //
2501
2502         void* dir = gSystem->OpenDirectory(GetShuttleLogDir());
2503         if (dir == NULL) {
2504                 if (gSystem->mkdir(GetShuttleLogDir(), kTRUE)) {
2505                         AliError(Form("Can't open directory <%s>", GetShuttleLogDir()));
2506                         return;
2507                 }
2508
2509         } else {
2510                 gSystem->FreeDirectory(dir);
2511         }
2512
2513         TString toLog = Form("%s (%d): %s - ", TTimeStamp(time(0)).AsString("s"), getpid(), detector);
2514         if (GetCurrentRun() >= 0) 
2515                 toLog += Form("run %d - ", GetCurrentRun());
2516         toLog += Form("%s", message);
2517
2518         AliInfo(toLog.Data());
2519         
2520         // if we redirect the log output already to the file, leave here
2521         if (fOutputRedirected && strcmp(detector, "SHUTTLE") != 0)
2522                 return;
2523
2524         TString fileName = GetLogFileName(detector);
2525         
2526         gSystem->ExpandPathName(fileName);
2527
2528         ofstream logFile;
2529         logFile.open(fileName, ofstream::out | ofstream::app);
2530
2531         if (!logFile.is_open()) {
2532                 AliError(Form("Could not open file %s", fileName.Data()));
2533                 return;
2534         }
2535
2536         logFile << toLog.Data() << "\n";
2537
2538         logFile.close();
2539 }
2540
2541 //______________________________________________________________________________________________
2542 TString AliShuttle::GetLogFileName(const char* detector) const
2543 {
2544         // 
2545         // returns the name of the log file for a given sub detector
2546         //
2547         
2548         TString fileName;
2549         
2550         if (GetCurrentRun() >= 0) 
2551                 fileName.Form("%s/%s_%d.log", GetShuttleLogDir(), detector, GetCurrentRun());
2552         else
2553                 fileName.Form("%s/%s.log", GetShuttleLogDir(), detector);
2554
2555         return fileName;
2556 }
2557
2558 //______________________________________________________________________________________________
2559 Bool_t AliShuttle::Collect(Int_t run)
2560 {
2561         //
2562         // Collects conditions data for all UNPROCESSED run written to DAQ LogBook in case of run = -1 (default)
2563         // If a dedicated run is given this run is processed
2564         //
2565         // In operational mode, this is the Shuttle function triggered by the EOR signal.
2566         //
2567
2568         if (run == -1)
2569                 Log("SHUTTLE","Collect - Shuttle called. Collecting conditions data for unprocessed runs");
2570         else
2571                 Log("SHUTTLE", Form("Collect - Shuttle called. Collecting conditions data for run %d", run));
2572
2573         SetLastAction("Starting");
2574
2575         TString whereClause("where shuttle_done=0");
2576         if (run != -1)
2577                 whereClause += Form(" and run=%d", run);
2578
2579         TObjArray shuttleLogbookEntries;
2580         if (!QueryShuttleLogbook(whereClause, shuttleLogbookEntries))
2581         {
2582                 Log("SHUTTLE", "Collect - Can't retrieve entries from Shuttle logbook");
2583                 return kFALSE;
2584         }
2585
2586         if (shuttleLogbookEntries.GetEntries() == 0)
2587         {
2588                 if (run == -1)
2589                         Log("SHUTTLE","Collect - Found no UNPROCESSED runs in Shuttle logbook");
2590                 else
2591                         Log("SHUTTLE", Form("Collect - Run %d is already DONE "
2592                                                 "or it does not exist in Shuttle logbook", run));
2593                 return kTRUE;
2594         }
2595
2596         for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
2597                 fFirstUnprocessed[iDet] = kTRUE;
2598
2599         if (run != -1)
2600         {
2601                 // query Shuttle logbook for earlier runs, check if some detectors are unprocessed,
2602                 // flag them into fFirstUnprocessed array
2603                 TString whereClause(Form("where shuttle_done=0 and run < %d", run));
2604                 TObjArray tmpLogbookEntries;
2605                 if (!QueryShuttleLogbook(whereClause, tmpLogbookEntries))
2606                 {
2607                         Log("SHUTTLE", "Collect - Can't retrieve entries from Shuttle logbook");
2608                         return kFALSE;
2609                 }
2610
2611                 TIter iter(&tmpLogbookEntries);
2612                 AliShuttleLogbookEntry* anEntry = 0;
2613                 while ((anEntry = dynamic_cast<AliShuttleLogbookEntry*> (iter.Next())))
2614                 {
2615                         for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
2616                         {
2617                                 if (anEntry->GetDetectorStatus(iDet) == AliShuttleLogbookEntry::kUnprocessed)
2618                                 {
2619                                         AliDebug(2, Form("Run %d: setting %s as \"not first time unprocessed\"",
2620                                                         anEntry->GetRun(), GetDetName(iDet)));
2621                                         fFirstUnprocessed[iDet] = kFALSE;
2622                                 }
2623                         }
2624
2625                 }
2626
2627         }
2628
2629         if (!RetrieveConditionsData(shuttleLogbookEntries))
2630         {
2631                 Log("SHUTTLE", "Collect - Process of at least one run failed");
2632                 return kFALSE;
2633         }
2634
2635         Log("SHUTTLE", "Collect - Requested run(s) successfully processed");
2636         return kTRUE;
2637 }
2638
2639 //______________________________________________________________________________________________
2640 Bool_t AliShuttle::RetrieveConditionsData(const TObjArray& dateEntries)
2641 {
2642         //
2643         // Retrieve conditions data for all runs that aren't processed yet
2644         //
2645
2646         Bool_t hasError = kFALSE;
2647
2648         TIter iter(&dateEntries);
2649         AliShuttleLogbookEntry* anEntry;
2650
2651         while ((anEntry = (AliShuttleLogbookEntry*) iter.Next())){
2652                 if (!Process(anEntry)){
2653                         hasError = kTRUE;
2654                 }
2655
2656                 // clean SHUTTLE temp directory
2657                 TString filename = Form("%s/*.shuttle", GetShuttleTempDir());
2658                 RemoveFile(filename.Data());
2659         }
2660
2661         return hasError == kFALSE;
2662 }
2663
2664 //______________________________________________________________________________________________
2665 ULong_t AliShuttle::GetTimeOfLastAction() const
2666 {
2667         //
2668         // Gets time of last action
2669         //
2670
2671         ULong_t tmp;
2672
2673         fMonitoringMutex->Lock();
2674
2675         tmp = fLastActionTime;
2676
2677         fMonitoringMutex->UnLock();
2678
2679         return tmp;
2680 }
2681
2682 //______________________________________________________________________________________________
2683 const TString AliShuttle::GetLastAction() const
2684 {
2685         //
2686         // returns a string description of the last action
2687         //
2688
2689         TString tmp;
2690
2691         fMonitoringMutex->Lock();
2692         
2693         tmp = fLastAction;
2694         
2695         fMonitoringMutex->UnLock();
2696
2697         return tmp;
2698 }
2699
2700 //______________________________________________________________________________________________
2701 void AliShuttle::SetLastAction(const char* action)
2702 {
2703         //
2704         // updates the monitoring variables
2705         //
2706
2707         fMonitoringMutex->Lock();
2708
2709         fLastAction = action;
2710         fLastActionTime = time(0);
2711         
2712         fMonitoringMutex->UnLock();
2713 }
2714
2715 //______________________________________________________________________________________________
2716 const char* AliShuttle::GetRunParameter(const char* param)
2717 {
2718         //
2719         // returns run parameter read from DAQ logbook
2720         //
2721
2722         if(!fLogbookEntry) {
2723                 AliError("No logbook entry!");
2724                 return 0;
2725         }
2726
2727         return fLogbookEntry->GetRunParameter(param);
2728 }
2729
2730 //______________________________________________________________________________________________
2731 AliCDBEntry* AliShuttle::GetFromOCDB(const char* detector, const AliCDBPath& path)
2732 {
2733         //
2734         // returns object from OCDB valid for current run
2735         //
2736
2737         if (fTestMode & kErrorOCDB)
2738         {
2739                 Log(detector, "GetFromOCDB - In TESTMODE - Simulating error with OCDB");
2740                 return 0;
2741         }
2742         
2743         AliCDBStorage *sto = AliCDBManager::Instance()->GetStorage(fgkMainCDB);
2744         if (!sto)
2745         {
2746                 Log(detector, "GetFromOCDB - Cannot activate main OCDB for query!");
2747                 return 0;
2748         }
2749
2750         return dynamic_cast<AliCDBEntry*> (sto->Get(path, GetCurrentRun()));
2751 }
2752
2753 //______________________________________________________________________________________________
2754 Bool_t AliShuttle::SendMail()
2755 {
2756         //
2757         // sends a mail to the subdetector expert in case of preprocessor error
2758         //
2759         
2760         if (fTestMode != kNone)
2761                 return kTRUE;
2762
2763         void* dir = gSystem->OpenDirectory(GetShuttleLogDir());
2764         if (dir == NULL)
2765         {
2766                 if (gSystem->mkdir(GetShuttleLogDir(), kTRUE))
2767                 {
2768                         AliError(Form("Can't open directory <%s>", GetShuttleLogDir()));
2769                         return kFALSE;
2770                 }
2771
2772         } else {
2773                 gSystem->FreeDirectory(dir);
2774         }
2775
2776         TString bodyFileName;
2777         bodyFileName.Form("%s/mail.body", GetShuttleLogDir());
2778         gSystem->ExpandPathName(bodyFileName);
2779
2780         ofstream mailBody;
2781         mailBody.open(bodyFileName, ofstream::out);
2782
2783         if (!mailBody.is_open())
2784         {
2785                 AliError(Form("Could not open mail body file %s", bodyFileName.Data()));
2786                 return kFALSE;
2787         }
2788
2789         TString to="";
2790         TIter iterExperts(fConfig->GetResponsibles(fCurrentDetector));
2791         TObjString *anExpert=0;
2792         while ((anExpert = (TObjString*) iterExperts.Next()))
2793         {
2794                 to += Form("%s,", anExpert->GetName());
2795         }
2796         to.Remove(to.Length()-1);
2797         AliDebug(2, Form("to: %s",to.Data()));
2798
2799         if (to.IsNull()) {
2800                 AliInfo("List of detector responsibles not yet set!");
2801                 return kFALSE;
2802         }
2803
2804         TString cc="alberto.colla@cern.ch";
2805
2806         TString subject = Form("%s Shuttle preprocessor FAILED in run %d !",
2807                                 fCurrentDetector.Data(), GetCurrentRun());
2808         AliDebug(2, Form("subject: %s", subject.Data()));
2809
2810         TString body = Form("Dear %s expert(s), \n\n", fCurrentDetector.Data());
2811         body += Form("SHUTTLE just detected that your preprocessor "
2812                         "failed processing run %d!!\n\n", GetCurrentRun());
2813         body += Form("Please check %s status on the SHUTTLE monitoring page: \n\n", fCurrentDetector.Data());
2814         body += Form("\thttp://pcalimonitor.cern.ch:8889/shuttle.jsp?time=168 \n\n");
2815         body += Form("Find the %s log for the current run on \n\n"
2816                 "\thttp://pcalishuttle01.cern.ch:8880/logs/%s_%d.log \n\n", 
2817                 fCurrentDetector.Data(), fCurrentDetector.Data(), GetCurrentRun());
2818         body += Form("The last 10 lines of %s log file are following:\n\n");
2819
2820         AliDebug(2, Form("Body begin: %s", body.Data()));
2821
2822         mailBody << body.Data();
2823         mailBody.close();
2824         mailBody.open(bodyFileName, ofstream::out | ofstream::app);
2825
2826         TString logFileName = Form("%s/%s_%d.log", GetShuttleLogDir(), fCurrentDetector.Data(), GetCurrentRun());
2827         TString tailCommand = Form("tail -n 10 %s >> %s", logFileName.Data(), bodyFileName.Data());
2828         if (gSystem->Exec(tailCommand.Data()))
2829         {
2830                 mailBody << Form("%s log file not found ...\n\n", fCurrentDetector.Data());
2831         }
2832
2833         TString endBody = Form("------------------------------------------------------\n\n");
2834         endBody += Form("In case of problems please contact the SHUTTLE core team.\n\n");
2835         endBody += "Please do not answer this message directly, it is automatically generated.\n\n";
2836         endBody += "Greetings,\n\n \t\t\tthe SHUTTLE\n";
2837
2838         AliDebug(2, Form("Body end: %s", endBody.Data()));
2839
2840         mailBody << endBody.Data();
2841
2842         mailBody.close();
2843
2844         // send mail!
2845         TString mailCommand = Form("mail -s \"%s\" -c %s %s < %s",
2846                                                 subject.Data(),
2847                                                 cc.Data(),
2848                                                 to.Data(),
2849                                                 bodyFileName.Data());
2850         AliDebug(2, Form("mail command: %s", mailCommand.Data()));
2851
2852         Bool_t result = gSystem->Exec(mailCommand.Data());
2853
2854         return result == 0;
2855 }
2856
2857 //______________________________________________________________________________________________
2858 const char* AliShuttle::GetRunType()
2859 {
2860         //
2861         // returns run type read from "run type" logbook
2862         //
2863
2864         if(!fLogbookEntry) {
2865                 AliError("No logbook entry!");
2866                 return 0;
2867         }
2868
2869         return fLogbookEntry->GetRunType();
2870 }
2871
2872 //______________________________________________________________________________________________
2873 Bool_t AliShuttle::GetHLTStatus()
2874 {
2875         // Return HLT status (ON=1 OFF=0)
2876         // Converts the HLT status from the status string read in the run logbook (not just a bool)
2877
2878         if(!fLogbookEntry) {
2879                 AliError("No logbook entry!");
2880                 return 0;
2881         }
2882
2883         // TODO implement when HLTStatus is inserted in run logbook
2884         //TString hltStatus = fLogbookEntry->GetRunParameter("HLTStatus");
2885         //if(hltStatus == "OFF") {return kFALSE};
2886
2887         return kTRUE;
2888 }
2889
2890 //______________________________________________________________________________________________
2891 void AliShuttle::SetShuttleTempDir(const char* tmpDir)
2892 {
2893         //
2894         // sets Shuttle temp directory
2895         //
2896
2897         fgkShuttleTempDir = gSystem->ExpandPathName(tmpDir);
2898 }
2899
2900 //______________________________________________________________________________________________
2901 void AliShuttle::SetShuttleLogDir(const char* logDir)
2902 {
2903         //
2904         // sets Shuttle log directory
2905         //
2906
2907         fgkShuttleLogDir = gSystem->ExpandPathName(logDir);
2908 }