]> git.uio.no Git - u/mrichter/AliRoot.git/blob - SHUTTLE/AliShuttle.cxx
Raw data tag merged files is written in /alice/data/.../lhcPeriod_DET/runNb/raw if...
[u/mrichter/AliRoot.git] / SHUTTLE / AliShuttle.cxx
1 /**************************************************************************
2  * Copyright(c) 1998-1999, ALICE Experiment at CERN, All rights reserved. *
3  *                                                                        *
4  * Author: The ALICE Off-line Project.                                    *
5  * Contributors are mentioned in the code where appropriate.              *
6  *                                                                        *
7  * Permission to use, copy, modify and distribute this software and its   *
8  * documentation strictly for non-commercial purposes is hereby granted   *
9  * without fee, provided that the above copyright notice appears in all   *
10  * copies and that both the copyright notice and this permission notice   *
11  * appear in the supporting documentation. The authors make no claims     *
12  * about the suitability of this software for any purpose. It is          *
13  * provided "as is" without express or implied warranty.                  *
14  **************************************************************************/
15
16 /*
17 $Log$
18 Revision 1.77  2007/12/19 11:16:16  acolla
19 More meaningful log message added in GetFileSources
20
21 Revision 1.76  2007/12/19 07:45:20  acolla
22 bug fix in the name of the raw tag files (Raw instead of raw)
23
24 Revision 1.75  2007/12/18 15:42:14  jgrosseo
25 adding number of open runs to monitoring
26
27 Revision 1.74  2007/12/17 03:23:32  jgrosseo
28 several bugfixes
29 added "empty preprocessor" as placeholder for Acorde in FDR
30
31 Revision 1.73  2007/12/14 19:31:36  acolla
32 Sending email to DCS experts is temporarily commented
33
34 Revision 1.72  2007/12/13 15:44:28  acolla
35 Run type added in mail sent to detector expert (eases understanding)
36
37 Revision 1.71  2007/12/12 14:56:14  jgrosseo
38 sending shuttle_ignore to ML also in case of 0 events
39
40 Revision 1.70  2007/12/12 13:45:35  acolla
41 Monalisa started in Collect() function. Alive message to monitor is sent at each Collect and every minute during preprocessor processing.
42
43 Revision 1.69  2007/12/12 10:06:29  acolla
44 in AliShuttle.cxx: SHUTTLE logbook is updated in case of invalid run times:
45
46 time_start==0 && time_end==0
47
48 logbook is NOT updated if time_start != 0 && time_end == 0, because it may mean that the run is still ongoing.
49
50 Revision 1.68  2007/12/11 10:15:17  acolla
51 Added marking SHUTTLE=DONE for invalid runs
52 (invalid start time or end time) and runs with totalEvents < 1
53
54 Revision 1.67  2007/12/07 19:14:36  acolla
55 in AliShuttleTrigger:
56
57 Added automatic collection of new runs on a regular time basis (settable from the configuration)
58
59 in AliShuttleConfig: new members
60
61 - triggerWait: time to wait for DIM trigger (s) before starting automatic collection of new runs
62 - mode: run mode (test, prod) -> used to build log folder (logs or logs_PROD)
63
64 in AliShuttle:
65
66 - logs now stored in logs/#RUN/DET_#RUN.log
67
68 Revision 1.66  2007/12/05 10:45:19  jgrosseo
69 changed order of arguments to TMonaLisaWriter
70
71 Revision 1.65  2007/11/26 16:58:37  acolla
72 Monalisa configuration added: host and table name
73
74 Revision 1.64  2007/11/13 16:15:47  acolla
75 DCS map is stored in a file in the temp folder where the detector is processed.
76 If the preprocessor fails, the temp folder is not removed. This will help the debugging of the problem.
77
78 Revision 1.63  2007/11/02 10:53:16  acolla
79 Protection added to AliShuttle::CopyFileLocally
80
81 Revision 1.62  2007/10/31 18:23:13  acolla
82 Furter developement on the Shuttle:
83
84 - Shuttle now connects to the Grid as alidaq. The OCDB and Reference folders
85 are now built from /alice/data, e.g.:
86 /alice/data/2007/LHC07a/OCDB
87
88 the year and LHC period are taken from the Shuttle.
89 Raw metadata files are stored by GRP to:
90 /alice/data/2007/LHC07a/<runNb>/Raw/RunMetadata.root
91
92 - Shuttle sends a mail to DCS experts each time DP retrieval fails.
93
94 Revision 1.61  2007/10/30 20:33:51  acolla
95 Improved managing of temporary folders, which weren't correctly handled.
96 Resolved bug introduced in StoreReferenceFile, which caused SPD preprocessor fail.
97
98 Revision 1.60  2007/10/29 18:06:16  acolla
99
100 New function StoreRunMetadataFile added to preprocessor and Shuttle interface
101 This function can be used by GRP only. It stores raw data tags merged file to the
102 raw data folder (e.g. /alice/data/2008/LHC08a/000099999/Raw).
103
104 KNOWN ISSUES:
105
106 1. Shuttle cannot write to /alice/data/ because it belongs to alidaq. Tag file is stored in /alice/simulation/... for the time being.
107 2. Due to a bug in TAlien::Mkdir, the creation of a folder in recursive mode (-p option) does not work. The problem
108 has been corrected in the root package on the Shuttle machine.
109
110 Revision 1.59  2007/10/05 12:40:55  acolla
111
112 Result error code added to AliDCSClient data members (it was "lost" with the new implementation of TMap* GetAliasValues and GetDPValues).
113
114 Revision 1.58  2007/09/28 15:27:40  acolla
115
116 AliDCSClient "multiSplit" option added in the DCS configuration
117 in AliDCSMessage: variable MAX_BODY_SIZE set to 500000
118
119 Revision 1.57  2007/09/27 16:53:13  acolla
120 Detectors can have more than one AMANDA server. SHUTTLE queries the servers sequentially,
121 merges the dcs aliases/DPs in one TMap and sends it to the preprocessor.
122
123 Revision 1.56  2007/09/14 16:46:14  jgrosseo
124 1) Connect and Close are called before and after each query, so one can
125 keep the same AliDCSClient object.
126 2) The splitting of a query is moved to GetDPValues/GetAliasValues.
127 3) Splitting interval can be specified in constructor
128
129 Revision 1.55  2007/08/06 12:26:40  acolla
130 Function Bool_t GetHLTStatus added to preprocessor. It returns the status of HLT
131 read from the run logbook.
132
133 Revision 1.54  2007/07/12 09:51:25  jgrosseo
134 removed duplicated log message in GetFile
135
136 Revision 1.53  2007/07/12 09:26:28  jgrosseo
137 updating hlt fxs base path
138
139 Revision 1.52  2007/07/12 08:06:45  jgrosseo
140 adding log messages in getfile... functions
141 adding not implemented copy constructor in alishuttleconfigholder
142
143 Revision 1.51  2007/07/03 17:24:52  acolla
144 root moved to v5-16-00. TFileMerger->Cp moved to TFile::Cp.
145
146 Revision 1.50  2007/07/02 17:19:32  acolla
147 preprocessor is run in a temp directory that is removed when process is finished.
148
149 Revision 1.49  2007/06/29 10:45:06  acolla
150 Number of columns in MySql Shuttle logbook increased by one (HLT added)
151
152 Revision 1.48  2007/06/21 13:06:19  acolla
153 GetFileSources returns dummy list with 1 source if system=DCS (better than
154 returning error as it was)
155
156 Revision 1.47  2007/06/19 17:28:56  acolla
157 HLT updated; missing map bug removed.
158
159 Revision 1.46  2007/06/09 13:01:09  jgrosseo
160 Switching to retrieval of several DCS DPs at a time (multiDPrequest)
161
162 Revision 1.45  2007/05/30 06:35:20  jgrosseo
163 Adding functionality to the Shuttle/TestShuttle:
164 o) Function to retrieve list of sources from a given system (GetFileSources with id=0)
165 o) Function to retrieve list of IDs for a given source      (GetFileIDs)
166 These functions are needed for dealing with the tag files that are saved for the GRP preprocessor
167 Example code has been added to the TestProcessor in TestShuttle
168
169 Revision 1.44  2007/05/11 16:09:32  acolla
170 Reference files for ITS, MUON and PHOS are now stored in OfflineDetName/OnlineDetName/run_...
171 example: ITS/SPD/100_filename.root
172
173 Revision 1.43  2007/05/10 09:59:51  acolla
174 Various bug fixes in StoreRefFilesToGrid; Cleaning of reference storage before processing detector (CleanReferenceStorage)
175
176 Revision 1.42  2007/05/03 08:01:39  jgrosseo
177 typo in last commit :-(
178
179 Revision 1.41  2007/05/03 08:00:48  jgrosseo
180 fixing log message when pp want to skip dcs value retrieval
181
182 Revision 1.40  2007/04/27 07:06:48  jgrosseo
183 GetFileSources returns empty list in case of no files, but successful query
184 No mails sent in testmode
185
186 Revision 1.39  2007/04/17 12:43:57  acolla
187 Correction in StoreOCDB; change of text in mail to detector expert
188
189 Revision 1.38  2007/04/12 08:26:18  jgrosseo
190 updated comment
191
192 Revision 1.37  2007/04/10 16:53:14  jgrosseo
193 redirecting sub detector stdout, stderr to sub detector log file
194
195 Revision 1.35  2007/04/04 16:26:38  acolla
196 1. Re-organization of function calls in TestPreprocessor to make it more meaningful.
197 2. Added missing dependency in test preprocessors.
198 3. in AliShuttle.cxx: processing time and memory consumption info on a single line.
199
200 Revision 1.34  2007/04/04 10:33:36  jgrosseo
201 1) Storing of files to the Grid is now done _after_ your preprocessors succeeded. This is transparent, which means that you can still use the same functions (Store, StoreReferenceData) to store files to the Grid. However, the Shuttle first stores them locally and transfers them after the preprocessor finished. The return code of these two functions has changed from UInt_t to Bool_t which gives you the success of the storing.
202 In case of an error with the Grid, the Shuttle will retry the storing later, the preprocessor does not need to be run again.
203
204 2) The meaning of the return code of the preprocessor has changed. 0 is now success and any other value means failure. This value is stored in the log and you can use it to keep details about the error condition.
205
206 3) New function StoreReferenceFile to _directly_ store a file (without opening it) to the reference storage.
207
208 4) The memory usage of the preprocessor is monitored. If it exceeds 2 GB it is terminated.
209
210 5) New function AliPreprocessor::ProcessDCS(). If you do not need to have DCS data in all cases, you can skip the processing by implemting this function and returning kFALSE under certain conditions. E.g. if there is a certain run type.
211 If you always need DCS data (like before), you do not need to implement it.
212
213 6) The run type has been added to the monitoring page
214
215 Revision 1.33  2007/04/03 13:56:01  acolla
216 Grid Storage at the end of preprocessing. Added virtual method to disable DCS query according to the
217 run type.
218
219 Revision 1.32  2007/02/28 10:41:56  acolla
220 Run type field added in SHUTTLE framework. Run type is read from "run type" logbook and retrieved by
221 AliPreprocessor::GetRunType() function.
222 Added some ldap definition files.
223
224 Revision 1.30  2007/02/13 11:23:21  acolla
225 Moved getters and setters of Shuttle's main OCDB/Reference, local
226 OCDB/Reference, temp and log folders to AliShuttleInterface
227
228 Revision 1.27  2007/01/30 17:52:42  jgrosseo
229 adding monalisa monitoring
230
231 Revision 1.26  2007/01/23 19:20:03  acolla
232 Removed old ldif files, added TOF, MCH ldif files. Added some options in
233 AliShuttleConfig::Print. Added in Ali Shuttle: SetShuttleTempDir and
234 SetShuttleLogDir
235
236 Revision 1.25  2007/01/15 19:13:52  acolla
237 Moved some AliInfo to AliDebug in SendMail function
238
239 Revision 1.21  2006/12/07 08:51:26  jgrosseo
240 update (alberto):
241 table, db names in ldap configuration
242 added GRP preprocessor
243 DCS data can also be retrieved by data point
244
245 Revision 1.20  2006/11/16 16:16:48  jgrosseo
246 introducing strict run ordering flag
247 removed giving preprocessor name to preprocessor, they have to know their name themselves ;-)
248
249 Revision 1.19  2006/11/06 14:23:04  jgrosseo
250 major update (Alberto)
251 o) reading of run parameters from the logbook
252 o) online offline naming conversion
253 o) standalone DCSclient package
254
255 Revision 1.18  2006/10/20 15:22:59  jgrosseo
256 o) Adding time out to the execution of the preprocessors: The Shuttle forks and the parent process monitors the child
257 o) Merging Collect, CollectAll, CollectNew function
258 o) Removing implementation of empty copy constructors (declaration still there!)
259
260 Revision 1.17  2006/10/05 16:20:55  jgrosseo
261 adapting to new CDB classes
262
263 Revision 1.16  2006/10/05 15:46:26  jgrosseo
264 applying to the new interface
265
266 Revision 1.15  2006/10/02 16:38:39  jgrosseo
267 update (alberto):
268 fixed memory leaks
269 storing of objects that failed to be stored to the grid before
270 interfacing of shuttle status table in daq system
271
272 Revision 1.14  2006/08/29 09:16:05  jgrosseo
273 small update
274
275 Revision 1.13  2006/08/15 10:50:00  jgrosseo
276 effc++ corrections (alberto)
277
278 Revision 1.12  2006/08/08 14:19:29  jgrosseo
279 Update to shuttle classes (Alberto)
280
281 - Possibility to set the full object's path in the Preprocessor's and
282 Shuttle's  Store functions
283 - Possibility to extend the object's run validity in the same classes
284 ("startValidity" and "validityInfinite" parameters)
285 - Implementation of the StoreReferenceData function to store reference
286 data in a dedicated CDB storage.
287
288 Revision 1.11  2006/07/21 07:37:20  jgrosseo
289 last run is stored after each run
290
291 Revision 1.10  2006/07/20 09:54:40  jgrosseo
292 introducing status management: The processing per subdetector is divided into several steps,
293 after each step the status is stored on disk. If the system crashes in any of the steps the Shuttle
294 can keep track of the number of failures and skips further processing after a certain threshold is
295 exceeded. These thresholds can be configured in LDAP.
296
297 Revision 1.9  2006/07/19 10:09:55  jgrosseo
298 new configuration, accesst to DAQ FES (Alberto)
299
300 Revision 1.8  2006/07/11 12:44:36  jgrosseo
301 adding parameters for extended validity range of data produced by preprocessor
302
303 Revision 1.7  2006/07/10 14:37:09  jgrosseo
304 small fix + todo comment
305
306 Revision 1.6  2006/07/10 13:01:41  jgrosseo
307 enhanced storing of last sucessfully processed run (alberto)
308
309 Revision 1.5  2006/07/04 14:59:57  jgrosseo
310 revision of AliDCSValue: Removed wrapper classes, reduced storage size per value by factor 2
311
312 Revision 1.4  2006/06/12 09:11:16  jgrosseo
313 coding conventions (Alberto)
314
315 Revision 1.3  2006/06/06 14:26:40  jgrosseo
316 o) removed files that were moved to STEER
317 o) shuttle updated to follow the new interface (Alberto)
318
319 Revision 1.2  2006/03/07 07:52:34  hristov
320 New version (B.Yordanov)
321
322 Revision 1.6  2005/11/19 17:19:14  byordano
323 RetrieveDATEEntries and RetrieveConditionsData added
324
325 Revision 1.5  2005/11/19 11:09:27  byordano
326 AliShuttle declaration added
327
328 Revision 1.4  2005/11/17 17:47:34  byordano
329 TList changed to TObjArray
330
331 Revision 1.3  2005/11/17 14:43:23  byordano
332 import to local CVS
333
334 Revision 1.1.1.1  2005/10/28 07:33:58  hristov
335 Initial import as subdirectory in AliRoot
336
337 Revision 1.2  2005/09/13 08:41:15  byordano
338 default startTime endTime added
339
340 Revision 1.4  2005/08/30 09:13:02  byordano
341 some docs added
342
343 Revision 1.3  2005/08/29 21:15:47  byordano
344 some docs added
345
346 */
347
348 //
349 // This class is the main manager for AliShuttle. 
350 // It organizes the data retrieval from DCS and call the 
351 // interface methods of AliPreprocessor.
352 // For every detector in AliShuttleConfgi (see AliShuttleConfig),
353 // data for its set of aliases is retrieved. If there is registered
354 // AliPreprocessor for this detector then it will be used
355 // accroding to the schema (see AliPreprocessor).
356 // If there isn't registered AliPreprocessor than the retrieved
357 // data is stored automatically to the undelying AliCDBStorage.
358 // For detSpec is used the alias name.
359 //
360
361 #include "AliShuttle.h"
362
363 #include "AliCDBManager.h"
364 #include "AliCDBStorage.h"
365 #include "AliCDBId.h"
366 #include "AliCDBRunRange.h"
367 #include "AliCDBPath.h"
368 #include "AliCDBEntry.h"
369 #include "AliShuttleConfig.h"
370 #include "DCSClient/AliDCSClient.h"
371 #include "AliLog.h"
372 #include "AliPreprocessor.h"
373 #include "AliShuttleStatus.h"
374 #include "AliShuttleLogbookEntry.h"
375
376 #include <TSystem.h>
377 #include <TObject.h>
378 #include <TString.h>
379 #include <TTimeStamp.h>
380 #include <TObjString.h>
381 #include <TSQLServer.h>
382 #include <TSQLResult.h>
383 #include <TSQLRow.h>
384 #include <TMutex.h>
385 #include <TSystemDirectory.h>
386 #include <TSystemFile.h>
387 #include <TFile.h>
388 #include <TGrid.h>
389 #include <TGridResult.h>
390
391 #include <TMonaLisaWriter.h>
392
393 #include <fstream>
394
395 #include <sys/types.h>
396 #include <sys/wait.h>
397
398 ClassImp(AliShuttle)
399
400 //______________________________________________________________________________________________
401 AliShuttle::AliShuttle(const AliShuttleConfig* config,
402                 UInt_t timeout, Int_t retries):
403 fConfig(config),
404 fTimeout(timeout), fRetries(retries),
405 fPreprocessorMap(),
406 fLogbookEntry(0),
407 fCurrentDetector(),
408 fStatusEntry(0),
409 fMonitoringMutex(0),
410 fLastActionTime(0),
411 fLastAction(),
412 fMonaLisa(0),
413 fTestMode(kNone),
414 fReadTestMode(kFALSE),
415 fOutputRedirected(kFALSE)
416 {
417         //
418         // config: AliShuttleConfig used
419         // timeout: timeout used for AliDCSClient connection
420         // retries: the number of retries in case of connection error.
421         //
422
423         if (!fConfig->IsValid()) AliFatal("********** !!!!! Invalid configuration !!!!! **********");
424         for(int iSys=0;iSys<4;iSys++) {
425                 fServer[iSys]=0;
426                 if (iSys < 3)
427                         fFXSlist[iSys].SetOwner(kTRUE);
428         }
429         fPreprocessorMap.SetOwner(kTRUE);
430
431         for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
432                 fFirstUnprocessed[iDet] = kFALSE;
433
434         fMonitoringMutex = new TMutex();
435 }
436
437 //______________________________________________________________________________________________
438 AliShuttle::~AliShuttle()
439 {
440         //
441         // destructor
442         //
443
444         fPreprocessorMap.DeleteAll();
445         for(int iSys=0;iSys<4;iSys++)
446                 if(fServer[iSys]) {
447                         fServer[iSys]->Close();
448                         delete fServer[iSys];
449                         fServer[iSys] = 0;
450                 }
451
452         if (fStatusEntry){
453                 delete fStatusEntry;
454                 fStatusEntry = 0;
455         }
456         
457         if (fMonitoringMutex) 
458         {
459                 delete fMonitoringMutex;
460                 fMonitoringMutex = 0;
461         }
462 }
463
464 //______________________________________________________________________________________________
465 void AliShuttle::RegisterPreprocessor(AliPreprocessor* preprocessor)
466 {
467         //
468         // Registers new AliPreprocessor.
469         // It uses GetName() for indentificator of the pre processor.
470         // The pre processor is registered it there isn't any other
471         // with the same identificator (GetName()).
472         //
473
474         const char* detName = preprocessor->GetName();
475         if(GetDetPos(detName) < 0)
476                 AliFatal(Form("********** !!!!! Invalid detector name: %s !!!!! **********", detName));
477
478         if (fPreprocessorMap.GetValue(detName)) {
479                 AliWarning(Form("AliPreprocessor %s is already registered!", detName));
480                 return;
481         }
482
483         fPreprocessorMap.Add(new TObjString(detName), preprocessor);
484 }
485 //______________________________________________________________________________________________
486 Bool_t AliShuttle::Store(const AliCDBPath& path, TObject* object,
487                 AliCDBMetaData* metaData, Int_t validityStart, Bool_t validityInfinite)
488 {
489         // Stores a CDB object in the storage for offline reconstruction. Objects that are not needed for
490         // offline reconstruction, but should be stored anyway (e.g. for debugging) should NOT be stored
491         // using this function. Use StoreReferenceData instead!
492         // It calls StoreLocally function which temporarily stores the data locally; when the preprocessor
493         // finishes the data are transferred to the main storage (Grid).
494
495         return StoreLocally(fgkLocalCDB, path, object, metaData, validityStart, validityInfinite);
496 }
497
498 //______________________________________________________________________________________________
499 Bool_t AliShuttle::StoreReferenceData(const AliCDBPath& path, TObject* object, AliCDBMetaData* metaData)
500 {
501         // Stores a CDB object in the storage for reference data. This objects will not be available during
502         // offline reconstrunction. Use this function for reference data only!
503         // It calls StoreLocally function which temporarily stores the data locally; when the preprocessor
504         // finishes the data are transferred to the main storage (Grid).
505
506         return StoreLocally(fgkLocalRefStorage, path, object, metaData);
507 }
508
509 //______________________________________________________________________________________________
510 Bool_t AliShuttle::StoreLocally(const TString& localUri,
511                         const AliCDBPath& path, TObject* object, AliCDBMetaData* metaData,
512                         Int_t validityStart, Bool_t validityInfinite)
513 {
514         // Store object temporarily in local storage. Parameters are passed by Store and StoreReferenceData functions.
515         // when the preprocessor finishes the data are transferred to the main storage (Grid).
516         // The parameters are:
517         //   1) Uri of the backup storage (Local)
518         //   2) the object's path.
519         //   3) the object to be stored
520         //   4) the metaData to be associated with the object
521         //   5) the validity start run number w.r.t. the current run,
522         //      if the data is valid only for this run leave the default 0
523         //   6) specifies if the calibration data is valid for infinity (this means until updated),
524         //      typical for calibration runs, the default is kFALSE
525         //
526         // returns 0 if fail, 1 otherwise
527
528         if (fTestMode & kErrorStorage)
529         {
530                 Log(fCurrentDetector, "StoreLocally - In TESTMODE - Simulating error while storing locally");
531                 return kFALSE;
532         }
533         
534         const char* cdbType = (localUri == fgkLocalCDB) ? "CDB" : "Reference";
535
536         Int_t firstRun = GetCurrentRun() - validityStart;
537         if(firstRun < 0) {
538                 AliWarning("First valid run happens to be less than 0! Setting it to 0.");
539                 firstRun=0;
540         }
541
542         Int_t lastRun = -1;
543         if(validityInfinite) {
544                 lastRun = AliCDBRunRange::Infinity();
545         } else {
546                 lastRun = GetCurrentRun();
547         }
548
549         // Version is set to current run, it will be used later to transfer data to Grid
550         AliCDBId id(path, firstRun, lastRun, GetCurrentRun(), -1);
551
552         if(! dynamic_cast<TObjString*> (metaData->GetProperty("RunUsed(TObjString)"))){
553                 TObjString runUsed = Form("%d", GetCurrentRun());
554                 metaData->SetProperty("RunUsed(TObjString)", runUsed.Clone());
555         }
556
557         Bool_t result = kFALSE;
558
559         if (!(AliCDBManager::Instance()->GetStorage(localUri))) {
560                 Log("SHUTTLE", Form("StoreLocally - Cannot activate local %s storage", cdbType));
561         } else {
562                 result = AliCDBManager::Instance()->GetStorage(localUri)
563                                         ->Put(object, id, metaData);
564         }
565
566         if(!result) {
567
568                 Log(fCurrentDetector, Form("StoreLocally - Can't store object <%s>!", id.ToString().Data()));
569         }
570
571         return result;
572 }
573
574 //______________________________________________________________________________________________
575 Bool_t AliShuttle::StoreOCDB()
576 {
577         //
578         // Called when preprocessor ends successfully or when previous storage attempt failed (kStoreError status)
579         // Calls underlying StoreOCDB(const char*) function twice, for OCDB and Reference storage.
580         // Then calls StoreRefFilesToGrid to store reference files. 
581         //
582         
583         if (fTestMode & kErrorGrid)
584         {
585                 Log("SHUTTLE", "StoreOCDB - In TESTMODE - Simulating error while storing in the Grid");
586                 Log(fCurrentDetector, "StoreOCDB - In TESTMODE - Simulating error while storing in the Grid");
587                 return kFALSE;
588         }
589         
590         Log("SHUTTLE","StoreOCDB - Storing OCDB data ...");
591         Bool_t resultCDB = StoreOCDB(fgkMainCDB);
592
593         Log("SHUTTLE","StoreOCDB - Storing reference data ...");
594         Bool_t resultRef = StoreOCDB(fgkMainRefStorage);
595         
596         Log("SHUTTLE","StoreOCDB - Storing reference files ...");
597         Bool_t resultRefFiles = CopyFilesToGrid("reference");
598         
599         Bool_t resultMetadata = kTRUE;
600         if(fCurrentDetector == "GRP") 
601         {
602                 Log("StoreOCDB - SHUTTLE","Storing Run Metadata file ...");
603                 resultMetadata = CopyFilesToGrid("metadata");
604         }
605         
606         return resultCDB && resultRef && resultRefFiles && resultMetadata;
607 }
608
609 //______________________________________________________________________________________________
610 Bool_t AliShuttle::StoreOCDB(const TString& gridURI)
611 {
612         //
613         // Called by StoreOCDB(), performs actual storage to the main OCDB and reference storages (Grid)
614         //
615
616         TObjArray* gridIds=0;
617
618         Bool_t result = kTRUE;
619
620         const char* type = 0;
621         TString localURI;
622         if(gridURI == fgkMainCDB) {
623                 type = "OCDB";
624                 localURI = fgkLocalCDB;
625         } else if(gridURI == fgkMainRefStorage) {
626                 type = "reference";
627                 localURI = fgkLocalRefStorage;
628         } else {
629                 AliError(Form("Invalid storage URI: %s", gridURI.Data()));
630                 return kFALSE;
631         }
632
633         AliCDBManager* man = AliCDBManager::Instance();
634
635         AliCDBStorage *gridSto = man->GetStorage(gridURI);
636         if(!gridSto) {
637                 Log("SHUTTLE",
638                         Form("StoreOCDB - cannot activate main %s storage", type));
639                 return kFALSE;
640         }
641
642         gridIds = gridSto->GetQueryCDBList();
643
644         // get objects previously stored in local CDB
645         AliCDBStorage *localSto = man->GetStorage(localURI);
646         if(!localSto) {
647                 Log("SHUTTLE",
648                         Form("StoreOCDB - cannot activate local %s storage", type));
649                 return kFALSE;
650         }
651         AliCDBPath aPath(GetOfflineDetName(fCurrentDetector.Data()),"*","*");
652         // Local objects were stored with current run as Grid version!
653         TList* localEntries = localSto->GetAll(aPath.GetPath(), GetCurrentRun(), GetCurrentRun());
654         localEntries->SetOwner(1);
655
656         // loop on local stored objects
657         TIter localIter(localEntries);
658         AliCDBEntry *aLocEntry = 0;
659         while((aLocEntry = dynamic_cast<AliCDBEntry*> (localIter.Next()))){
660                 aLocEntry->SetOwner(1);
661                 AliCDBId aLocId = aLocEntry->GetId();
662                 aLocEntry->SetVersion(-1);
663                 aLocEntry->SetSubVersion(-1);
664
665                 // If local object is valid up to infinity we store it only if it is
666                 // the first unprocessed run!
667                 if (aLocId.GetLastRun() == AliCDBRunRange::Infinity() &&
668                         !fFirstUnprocessed[GetDetPos(fCurrentDetector)])
669                 {
670                         Log("SHUTTLE", Form("StoreOCDB - %s: object %s has validity infinite but "
671                                                 "there are previous unprocessed runs!",
672                                                 fCurrentDetector.Data(), aLocId.GetPath().Data()));
673                         result = kFALSE;
674                         continue;
675                 }
676
677                 // loop on Grid valid Id's
678                 Bool_t store = kTRUE;
679                 TIter gridIter(gridIds);
680                 AliCDBId* aGridId = 0;
681                 while((aGridId = dynamic_cast<AliCDBId*> (gridIter.Next()))){
682                         if(aGridId->GetPath() != aLocId.GetPath()) continue;
683                         // skip all objects valid up to infinity
684                         if(aGridId->GetLastRun() == AliCDBRunRange::Infinity()) continue;
685                         // if we get here, it means there's already some more recent object stored on Grid!
686                         store = kFALSE;
687                         break;
688                 }
689
690                 // If we get here, the file can be stored!
691                 Bool_t storeOk = gridSto->Put(aLocEntry);
692                 if(!store || storeOk){
693
694                         if (!store)
695                         {
696                                 Log(fCurrentDetector.Data(),
697                                         Form("StoreOCDB - A more recent object already exists in %s storage: <%s>",
698                                                 type, aGridId->ToString().Data()));
699                         } else {
700                                 Log("SHUTTLE",
701                                         Form("StoreOCDB - Object <%s> successfully put into %s storage",
702                                                 aLocId.ToString().Data(), type));
703                                 Log(fCurrentDetector.Data(),
704                                         Form("StoreOCDB - Object <%s> successfully put into %s storage",
705                                                 aLocId.ToString().Data(), type));
706                         }
707
708                         // removing local filename...
709                         TString filename;
710                         localSto->IdToFilename(aLocId, filename);
711                         Log("SHUTTLE", Form("StoreOCDB - Removing local file %s", filename.Data()));
712                         RemoveFile(filename.Data());
713                         continue;
714                 } else  {
715                         Log("SHUTTLE",
716                                 Form("StoreOCDB - Grid %s storage of object <%s> failed",
717                                         type, aLocId.ToString().Data()));
718                         Log(fCurrentDetector.Data(),
719                                 Form("StoreOCDB - Grid %s storage of object <%s> failed",
720                                         type, aLocId.ToString().Data()));
721                         result = kFALSE;
722                 }
723         }
724         localEntries->Clear();
725
726         return result;
727 }
728
729 //______________________________________________________________________________________________
730 Bool_t AliShuttle::CleanReferenceStorage(const char* detector)
731 {
732         // clears the directory used to store reference files of a given subdetector
733   
734         AliCDBManager* man = AliCDBManager::Instance();
735         AliCDBStorage* sto = man->GetStorage(fgkLocalRefStorage);
736         TString localBaseFolder = sto->GetBaseFolder();
737
738         TString targetDir = GetRefFilePrefix(localBaseFolder.Data(), detector);
739         
740         Log("SHUTTLE", Form("CleanReferenceStorage - Cleaning %s", targetDir.Data()));
741
742         TString begin;
743         begin.Form("%d_", GetCurrentRun());
744         
745         TSystemDirectory* baseDir = new TSystemDirectory("/", targetDir);
746         if (!baseDir)
747                 return kTRUE;
748                 
749         TList* dirList = baseDir->GetListOfFiles();
750         delete baseDir;
751         
752         if (!dirList) return kTRUE;
753                         
754         if (dirList->GetEntries() < 3) 
755         {
756                 delete dirList;
757                 return kTRUE;
758         }
759                                 
760         Int_t nDirs = 0, nDel = 0;
761         TIter dirIter(dirList);
762         TSystemFile* entry = 0;
763
764         Bool_t success = kTRUE;
765         
766         while ((entry = dynamic_cast<TSystemFile*> (dirIter.Next())))
767         {                                       
768                 if (entry->IsDirectory())
769                         continue;
770                 
771                 TString fileName(entry->GetName());
772                 if (!fileName.BeginsWith(begin))
773                         continue;
774                         
775                 nDirs++;
776                                                 
777                 // delete file
778                 Int_t result = gSystem->Unlink(fileName.Data());
779                 
780                 if (result)
781                 {
782                         Log("SHUTTLE", Form("CleanReferenceStorage - Could not delete file %s!", fileName.Data()));
783                         success = kFALSE;
784                 } else {
785                         nDel++;
786                 }
787         }
788
789         if(nDirs > 0)
790                 Log("SHUTTLE", Form("CleanReferenceStorage - %d (over %d) reference files in folder %s were deleted.", 
791                         nDel, nDirs, targetDir.Data()));
792
793                 
794         delete dirList;
795         return success;
796
797
798
799
800
801
802   Int_t result = gSystem->GetPathInfo(targetDir, 0, (Long64_t*) 0, 0, 0);
803   if (result == 0)
804   {
805     // delete directory
806     result = gSystem->Exec(Form("rm -rf %s", targetDir.Data()));
807     if (result != 0)
808     {  
809       Log("SHUTTLE", Form("CleanReferenceStorage - Could not clean directory %s", targetDir.Data()));
810       return kFALSE;
811     }
812   }
813
814   result = gSystem->mkdir(targetDir, kTRUE);
815   if (result != 0)
816   {
817     Log("SHUTTLE", Form("CleanReferenceStorage - Error creating base directory %s", targetDir.Data()));
818     return kFALSE;
819   }
820         
821   return kTRUE;
822 }
823
824 //______________________________________________________________________________________________
825 Bool_t AliShuttle::StoreReferenceFile(const char* detector, const char* localFile, const char* gridFileName)
826 {
827         //
828         // Stores reference file directly (without opening it). This function stores the file locally.
829         //
830         // The file is stored under the following location: 
831         // <base folder of local reference storage>/<DET>/<RUN#>_<gridFileName>
832         // where <gridFileName> is the second parameter given to the function
833         // 
834         
835         if (fTestMode & kErrorStorage)
836         {
837                 Log(fCurrentDetector, "StoreReferenceFile - In TESTMODE - Simulating error while storing locally");
838                 return kFALSE;
839         }
840         
841         AliCDBManager* man = AliCDBManager::Instance();
842         AliCDBStorage* sto = man->GetStorage(fgkLocalRefStorage);
843         
844         TString localBaseFolder = sto->GetBaseFolder();
845         
846         TString target = GetRefFilePrefix(localBaseFolder.Data(), detector);    
847         target.Append(Form("/%d_%s", GetCurrentRun(), gridFileName));
848         
849         return CopyFileLocally(localFile, target);
850 }
851
852 //______________________________________________________________________________________________
853 Bool_t AliShuttle::StoreRunMetadataFile(const char* localFile, const char* gridFileName)
854 {
855         //
856         // Stores Run metadata file to the Grid, in the run folder
857         //
858         // Only GRP can call this function.
859         
860         if (fTestMode & kErrorStorage)
861         {
862                 Log(fCurrentDetector, "StoreRunMetaDataFile - In TESTMODE - Simulating error while storing locally");
863                 return kFALSE;
864         }
865         
866         AliCDBManager* man = AliCDBManager::Instance();
867         AliCDBStorage* sto = man->GetStorage(fgkLocalRefStorage);
868         
869         TString localBaseFolder = sto->GetBaseFolder();
870         
871         // Build Run level folder
872         // folder = /alice/data/year/lhcPeriod/runNb/raw
873         
874                 
875         TString lhcPeriod = GetLHCPeriod();     
876         if (lhcPeriod.Length() == 0) 
877         {
878                 Log("SHUTTLE","StoreRunMetaDataFile - LHCPeriod not found in logbook!");
879                 return 0;
880         }
881         
882         // TODO partitions with one detector only write data into LHCperiod_DET
883         TString partition = GetRunParameter("partition");
884         
885         if (partition.Length() > 0 && partition != "ALICE")
886         {
887                 lhcPeriod.Append(Form("_%s", partition.Data()));
888                 Log(fCurrentDetector, Form("Run data tags merged file will be written in %s", 
889                                 lhcPeriod.Data()));
890         }
891                 
892         TString target = Form("%s/GRP/RunMetadata/alice/data/%d/%s/%09d/raw/%s", 
893                                 localBaseFolder.Data(), GetCurrentYear(), 
894                                 lhcPeriod.Data(), GetCurrentRun(), gridFileName);
895                                         
896         return CopyFileLocally(localFile, target);
897 }
898
899 //______________________________________________________________________________________________
900 Bool_t AliShuttle::CopyFileLocally(const char* localFile, const TString& target)
901 {
902         //
903         // Stores file locally. Called by StoreReferenceFile and StoreRunMetadataFile
904         // Files are temporarily stored in the local reference storage. When the preprocessor 
905         // finishes, the Shuttle calls CopyFilesToGrid to transfer the files to AliEn 
906         // (in reference or run level folders)
907         //
908         
909         TString targetDir(target(0, target.Last('/')));
910         
911         //try to open base dir folder, if it does not exist
912         void* dir = gSystem->OpenDirectory(targetDir.Data());
913         if (dir == NULL) {
914                 if (gSystem->mkdir(targetDir.Data(), kTRUE)) {
915                         Log("SHUTTLE", Form("CopyFileLocally - Can't open directory <%s>", targetDir.Data()));
916                         return kFALSE;
917                 }
918
919         } else {
920                 gSystem->FreeDirectory(dir);
921         }
922         
923         Int_t result = 0;
924         
925         result = gSystem->GetPathInfo(localFile, 0, (Long64_t*) 0, 0, 0);
926         if (result)
927         {
928                 Log("SHUTTLE", Form("CopyFileLocally - %s does not exist", localFile));
929                 return kFALSE;
930         }
931
932         result = gSystem->GetPathInfo(target, 0, (Long64_t*) 0, 0, 0);
933         if (!result)
934         {
935                 Log("SHUTTLE", Form("CopyFileLocally - target file %s already exist, removing...", target.Data()));
936                 if (gSystem->Unlink(target.Data()))
937                 {
938                         Log("SHUTTLE", Form("CopyFileLocally - Could not remove existing target file %s!", target.Data()));
939                         return kFALSE;
940                 }
941         }       
942         
943         result = gSystem->CopyFile(localFile, target);
944
945         if (result == 0)
946         {
947                 Log("SHUTTLE", Form("CopyFileLocally - File %s stored locally to %s", localFile, target.Data()));
948                 return kTRUE;
949         }
950         else
951         {
952                 Log("SHUTTLE", Form("CopyFileLocally - Could not store file %s to %s! Error code = %d", 
953                                 localFile, target.Data(), result));
954                 return kFALSE;
955         }       
956
957
958
959 }
960
961 //______________________________________________________________________________________________
962 Bool_t AliShuttle::CopyFilesToGrid(const char* type)
963 {
964         //
965         // Transfers local files to the Grid. Local files can be reference files 
966         // or run metadata file (from GRP only).
967         //
968         // According to the type (ref, metadata) the files are stored under the following location: 
969         // ref --> <base folder of reference storage>/<DET>/<RUN#>_<gridFileName>
970         // metadata --> <run data folder>/<MetadataFileName>
971         //
972                 
973         AliCDBManager* man = AliCDBManager::Instance();
974         AliCDBStorage* sto = man->GetStorage(fgkLocalRefStorage);
975         if (!sto)
976                 return kFALSE;
977         TString localBaseFolder = sto->GetBaseFolder();
978         
979         TString dir;
980         TString alienDir;
981         TString begin;
982         
983         if (strcmp(type, "reference") == 0) 
984         {
985                 dir = GetRefFilePrefix(localBaseFolder.Data(), fCurrentDetector.Data());
986                 AliCDBStorage* gridSto = man->GetStorage(fgkMainRefStorage);
987                 if (!gridSto)
988                         return kFALSE;
989                 TString gridBaseFolder = gridSto->GetBaseFolder();
990                 alienDir = GetRefFilePrefix(gridBaseFolder.Data(), fCurrentDetector.Data());
991                 begin = Form("%d_", GetCurrentRun());
992         } 
993         else if (strcmp(type, "metadata") == 0)
994         {
995                         
996                 TString lhcPeriod = GetLHCPeriod();
997         
998                 if (lhcPeriod.Length() == 0) 
999                 {
1000                         Log("SHUTTLE","CopyFilesToGrid - LHCPeriod not found in logbook!");
1001                         return 0;
1002                 }
1003                 
1004                 // TODO partitions with one detector only write data into LHCperiod_DET
1005                 TString partition = GetRunParameter("partition");
1006         
1007                 if (partition.Length() > 0 && partition != "ALICE")
1008                 {
1009                         lhcPeriod.Append(Form("_%s", partition.Data()));
1010                 }
1011                 
1012                 dir = Form("%s/GRP/RunMetadata/alice/data/%d/%s/%09d/raw", 
1013                                 localBaseFolder.Data(), GetCurrentYear(), 
1014                                 lhcPeriod.Data(), GetCurrentRun());
1015                 alienDir = dir(dir.Index("/alice/data/"), dir.Length());
1016                 
1017                 begin = "";
1018         }
1019         else 
1020         {
1021                 Log("SHUTTLE", "CopyFilesToGrid - Unexpected: type label must be reference or metadata!");
1022                 return kFALSE;
1023         }
1024                 
1025         TSystemDirectory* baseDir = new TSystemDirectory("/", dir);
1026         if (!baseDir)
1027                 return kTRUE;
1028                 
1029         TList* dirList = baseDir->GetListOfFiles();
1030         delete baseDir;
1031         
1032         if (!dirList) return kTRUE;
1033                 
1034         if (dirList->GetEntries() < 3) 
1035         {
1036                 delete dirList;
1037                 return kTRUE;
1038         }
1039                         
1040         if (!gGrid)
1041         { 
1042                 Log("SHUTTLE", "CopyFilesToGrid - Connection to Grid failed: Cannot continue!");
1043                 delete dirList;
1044                 return kFALSE;
1045         }
1046         
1047         Int_t nDirs = 0, nTransfer = 0;
1048         TIter dirIter(dirList);
1049         TSystemFile* entry = 0;
1050
1051         Bool_t success = kTRUE;
1052         Bool_t first = kTRUE;
1053         
1054         while ((entry = dynamic_cast<TSystemFile*> (dirIter.Next())))
1055         {                       
1056                 if (entry->IsDirectory())
1057                         continue;
1058                         
1059                 TString fileName(entry->GetName());
1060                 if (!fileName.BeginsWith(begin))
1061                         continue;
1062                         
1063                 nDirs++;
1064                         
1065                 if (first)
1066                 {
1067                         first = kFALSE;
1068                         // check that folder exists, otherwise create it
1069                         TGridResult* result = gGrid->Ls(alienDir.Data(), "a");
1070                         
1071                         if (!result)
1072                         {
1073                                 delete dirList;
1074                                 return kFALSE;
1075                         }
1076                         
1077                         if (!result->GetFileName(1)) // TODO: It looks like element 0 is always 0!!
1078                         {
1079                                 // TODO It does not work currently! Bug in TAliEn::Mkdir
1080                                 // TODO Manually fixed in local root v5-16-00
1081                                 if (!gGrid->Mkdir(alienDir.Data(),"-p",0))
1082                                 {
1083                                         Log("SHUTTLE", Form("CopyFilesToGrid - Cannot create directory %s",
1084                                                         alienDir.Data()));
1085                                         delete dirList;
1086                                         return kFALSE;
1087                                 } else {
1088                                         Log("SHUTTLE",Form("CopyFilesToGrid - Folder %s created", alienDir.Data()));
1089                                 }
1090                                 
1091                         } else {
1092                                         Log("SHUTTLE",Form("CopyFilesToGrid - Folder %s found", alienDir.Data()));
1093                         }
1094                 }
1095                         
1096                 TString fullLocalPath;
1097                 fullLocalPath.Form("%s/%s", dir.Data(), fileName.Data());
1098                 
1099                 TString fullGridPath;
1100                 fullGridPath.Form("alien://%s/%s", alienDir.Data(), fileName.Data());
1101
1102                 Bool_t result = TFile::Cp(fullLocalPath, fullGridPath);
1103                 
1104                 if (result)
1105                 {
1106                         Log("SHUTTLE", Form("CopyFilesToGrid - Copying local file %s to %s succeeded!", 
1107                                                 fullLocalPath.Data(), fullGridPath.Data()));
1108                         RemoveFile(fullLocalPath);
1109                         nTransfer++;
1110                 }
1111                 else
1112                 {
1113                         Log("SHUTTLE", Form("CopyFilesToGrid - Copying local file %s to %s FAILED!", 
1114                                                 fullLocalPath.Data(), fullGridPath.Data()));
1115                         success = kFALSE;
1116                 }
1117         }
1118
1119         Log("SHUTTLE", Form("CopyFilesToGrid - %d (over %d) files in folder %s copied to Grid.", 
1120                                                 nTransfer, nDirs, dir.Data()));
1121
1122                 
1123         delete dirList;
1124         return success;
1125 }
1126
1127 //______________________________________________________________________________________________
1128 const char* AliShuttle::GetRefFilePrefix(const char* base, const char* detector)
1129 {
1130         //
1131         // Get folder name of reference files 
1132         //
1133
1134         TString offDetStr(GetOfflineDetName(detector));
1135         TString dir;
1136         if (offDetStr == "ITS" || offDetStr == "MUON" || offDetStr == "PHOS")
1137         {
1138                 dir.Form("%s/%s/%s", base, offDetStr.Data(), detector);
1139         } else {
1140                 dir.Form("%s/%s", base, offDetStr.Data());
1141         }
1142         
1143         return dir.Data();
1144         
1145
1146 }
1147
1148 //______________________________________________________________________________________________
1149 void AliShuttle::CleanLocalStorage(const TString& uri)
1150 {
1151         //
1152         // Called in case the preprocessor is declared failed. Remove remaining objects from the local storages.
1153         //
1154
1155         const char* type = 0;
1156         if(uri == fgkLocalCDB) {
1157                 type = "OCDB";
1158         } else if(uri == fgkLocalRefStorage) {
1159                 type = "Reference";
1160         } else {
1161                 AliError(Form("Invalid storage URI: %s", uri.Data()));
1162                 return;
1163         }
1164
1165         AliCDBManager* man = AliCDBManager::Instance();
1166
1167         // open local storage
1168         AliCDBStorage *localSto = man->GetStorage(uri);
1169         if(!localSto) {
1170                 Log("SHUTTLE",
1171                         Form("CleanLocalStorage - cannot activate local %s storage", type));
1172                 return;
1173         }
1174
1175         TString filename(Form("%s/%s/*/Run*_v%d_s*.root",
1176                 localSto->GetBaseFolder().Data(), GetOfflineDetName(fCurrentDetector.Data()), GetCurrentRun()));
1177
1178         AliDebug(2, Form("filename = %s", filename.Data()));
1179
1180         Log("SHUTTLE", Form("Removing remaining local files for run %d and detector %s ...",
1181                 GetCurrentRun(), fCurrentDetector.Data()));
1182
1183         RemoveFile(filename.Data());
1184
1185 }
1186
1187 //______________________________________________________________________________________________
1188 void AliShuttle::RemoveFile(const char* filename)
1189 {
1190         //
1191         // removes local file
1192         //
1193
1194         TString command(Form("rm -f %s", filename));
1195
1196         Int_t result = gSystem->Exec(command.Data());
1197         if(result != 0)
1198         {
1199                 Log("SHUTTLE", Form("RemoveFile - %s: Cannot remove file %s!",
1200                         fCurrentDetector.Data(), filename));
1201         }
1202 }
1203
1204 //______________________________________________________________________________________________
1205 AliShuttleStatus* AliShuttle::ReadShuttleStatus()
1206 {
1207         //
1208         // Reads the AliShuttleStatus from the CDB
1209         //
1210
1211         if (fStatusEntry){
1212                 delete fStatusEntry;
1213                 fStatusEntry = 0;
1214         }
1215
1216         fStatusEntry = AliCDBManager::Instance()->GetStorage(GetLocalCDB())
1217                 ->Get(Form("/SHUTTLE/STATUS/%s", fCurrentDetector.Data()), GetCurrentRun());
1218
1219         if (!fStatusEntry) return 0;
1220         fStatusEntry->SetOwner(1);
1221
1222         AliShuttleStatus* status = dynamic_cast<AliShuttleStatus*> (fStatusEntry->GetObject());
1223         if (!status) {
1224                 AliError("Invalid object stored to CDB!");
1225                 return 0;
1226         }
1227
1228         return status;
1229 }
1230
1231 //______________________________________________________________________________________________
1232 Bool_t AliShuttle::WriteShuttleStatus(AliShuttleStatus* status)
1233 {
1234         //
1235         // writes the status for one subdetector
1236         //
1237
1238         if (fStatusEntry){
1239                 delete fStatusEntry;
1240                 fStatusEntry = 0;
1241         }
1242
1243         Int_t run = GetCurrentRun();
1244
1245         AliCDBId id(AliCDBPath("SHUTTLE", "STATUS", fCurrentDetector), run, run);
1246
1247         fStatusEntry = new AliCDBEntry(status, id, new AliCDBMetaData);
1248         fStatusEntry->SetOwner(1);
1249
1250         UInt_t result = AliCDBManager::Instance()->GetStorage(fgkLocalCDB)->Put(fStatusEntry);
1251
1252         if (!result) {
1253                 Log("SHUTTLE", Form("WriteShuttleStatus - Failed for %s, run %d",
1254                                                 fCurrentDetector.Data(), run));
1255                 return kFALSE;
1256         }
1257         
1258         SendMLInfo();
1259
1260         return kTRUE;
1261 }
1262
1263 //______________________________________________________________________________________________
1264 void AliShuttle::UpdateShuttleStatus(AliShuttleStatus::Status newStatus, Bool_t increaseCount)
1265 {
1266         //
1267         // changes the AliShuttleStatus for the given detector and run to the given status
1268         //
1269
1270         if (!fStatusEntry){
1271                 AliError("UNEXPECTED: fStatusEntry empty");
1272                 return;
1273         }
1274
1275         AliShuttleStatus* status = dynamic_cast<AliShuttleStatus*> (fStatusEntry->GetObject());
1276
1277         if (!status){
1278                 Log("SHUTTLE", "UpdateShuttleStatus - UNEXPECTED: status could not be read from current CDB entry");
1279                 return;
1280         }
1281
1282         TString actionStr = Form("UpdateShuttleStatus - %s: Changing state from %s to %s",
1283                                 fCurrentDetector.Data(),
1284                                 status->GetStatusName(),
1285                                 status->GetStatusName(newStatus));
1286         Log("SHUTTLE", actionStr);
1287         SetLastAction(actionStr);
1288
1289         status->SetStatus(newStatus);
1290         if (increaseCount) status->IncreaseCount();
1291
1292         AliCDBManager::Instance()->GetStorage(fgkLocalCDB)->Put(fStatusEntry);
1293
1294         SendMLInfo();
1295 }
1296
1297 //______________________________________________________________________________________________
1298 void AliShuttle::SendMLInfo()
1299 {
1300         //
1301         // sends ML information about the current status of the current detector being processed
1302         //
1303         
1304         AliShuttleStatus* status = dynamic_cast<AliShuttleStatus*> (fStatusEntry->GetObject());
1305         
1306         if (!status){
1307                 Log("SHUTTLE", "SendMLInfo - UNEXPECTED: status could not be read from current CDB entry");
1308                 return;
1309         }
1310         
1311         TMonaLisaText  mlStatus(Form("%s_status", fCurrentDetector.Data()), status->GetStatusName());
1312         TMonaLisaValue mlRetryCount(Form("%s_count", fCurrentDetector.Data()), status->GetCount());
1313
1314         TList mlList;
1315         mlList.Add(&mlStatus);
1316         mlList.Add(&mlRetryCount);
1317
1318         TString mlID;
1319         mlID.Form("%d", GetCurrentRun());
1320         fMonaLisa->SendParameters(&mlList, mlID);
1321 }
1322
1323 //______________________________________________________________________________________________
1324 Bool_t AliShuttle::ContinueProcessing()
1325 {
1326         // this function reads the AliShuttleStatus information from CDB and
1327         // checks if the processing should be continued
1328         // if yes it returns kTRUE and updates the AliShuttleStatus with nextStatus
1329
1330         if (!fConfig->HostProcessDetector(fCurrentDetector)) return kFALSE;
1331
1332         AliPreprocessor* aPreprocessor =
1333                 dynamic_cast<AliPreprocessor*> (fPreprocessorMap.GetValue(fCurrentDetector));
1334         if (!aPreprocessor)
1335         {
1336                 Log("SHUTTLE", Form("ContinueProcessing - %s: no preprocessor registered", fCurrentDetector.Data()));
1337                 return kFALSE;
1338         }
1339
1340         AliShuttleLogbookEntry::Status entryStatus =
1341                 fLogbookEntry->GetDetectorStatus(fCurrentDetector);
1342
1343         if(entryStatus != AliShuttleLogbookEntry::kUnprocessed) {
1344                 Log("SHUTTLE", Form("ContinueProcessing - %s is %s",
1345                                 fCurrentDetector.Data(),
1346                                 fLogbookEntry->GetDetectorStatusName(entryStatus)));
1347                 return kFALSE;
1348         }
1349
1350         // if we get here, according to Shuttle logbook subdetector is in UNPROCESSED state
1351
1352         // check if current run is first unprocessed run for current detector
1353         if (fConfig->StrictRunOrder(fCurrentDetector) &&
1354                 !fFirstUnprocessed[GetDetPos(fCurrentDetector)])
1355         {
1356                 if (fTestMode == kNone)
1357                 {
1358                         Log("SHUTTLE", Form("ContinueProcessing - %s requires strict run ordering"
1359                                         " but this is not the first unprocessed run!"));
1360                         return kFALSE;
1361                 }
1362                 else
1363                 {
1364                         Log("SHUTTLE", Form("ContinueProcessing - In TESTMODE - "
1365                                         "Although %s requires strict run ordering "
1366                                         "and this is not the first unprocessed run, "
1367                                         "the SHUTTLE continues"));
1368                 }
1369         }
1370
1371         AliShuttleStatus* status = ReadShuttleStatus();
1372         if (!status) {
1373                 // first time
1374                 Log("SHUTTLE", Form("ContinueProcessing - %s: Processing first time",
1375                                 fCurrentDetector.Data()));
1376                 status = new AliShuttleStatus(AliShuttleStatus::kStarted);
1377                 return WriteShuttleStatus(status);
1378         }
1379
1380         // The following two cases shouldn't happen if Shuttle Logbook was correctly updated.
1381         // If it happens it may mean Logbook updating failed... let's do it now!
1382         if (status->GetStatus() == AliShuttleStatus::kDone ||
1383             status->GetStatus() == AliShuttleStatus::kFailed){
1384                 Log("SHUTTLE", Form("ContinueProcessing - %s is already %s. Updating Shuttle Logbook",
1385                                         fCurrentDetector.Data(),
1386                                         status->GetStatusName(status->GetStatus())));
1387                 UpdateShuttleLogbook(fCurrentDetector.Data(),
1388                                         status->GetStatusName(status->GetStatus()));
1389                 return kFALSE;
1390         }
1391
1392         if (status->GetStatus() == AliShuttleStatus::kStoreError) {
1393                 Log("SHUTTLE",
1394                         Form("ContinueProcessing - %s: Grid storage of one or more "
1395                                 "objects failed. Trying again now",
1396                                 fCurrentDetector.Data()));
1397                 UpdateShuttleStatus(AliShuttleStatus::kStoreStarted);
1398                 if (StoreOCDB()){
1399                         Log("SHUTTLE", Form("ContinueProcessing - %s: all objects "
1400                                 "successfully stored into main storage",
1401                                 fCurrentDetector.Data()));
1402                 } else {
1403                         Log("SHUTTLE",
1404                                 Form("ContinueProcessing - %s: Grid storage failed again",
1405                                         fCurrentDetector.Data()));
1406                         UpdateShuttleStatus(AliShuttleStatus::kStoreError);
1407                 }
1408                 return kFALSE;
1409         }
1410
1411         // if we get here, there is a restart
1412         Bool_t cont = kFALSE;
1413
1414         // abort conditions
1415         if (status->GetCount() >= fConfig->GetMaxRetries()) {
1416                 Log("SHUTTLE", Form("ContinueProcessing - %s failed %d times in status %s - "
1417                                 "Updating Shuttle Logbook", fCurrentDetector.Data(),
1418                                 status->GetCount(), status->GetStatusName()));
1419                 UpdateShuttleLogbook(fCurrentDetector.Data(), "FAILED");
1420                 UpdateShuttleStatus(AliShuttleStatus::kFailed);
1421
1422                 // there may still be objects in local OCDB and reference storage
1423                 // and FXS databases may be not updated: do it now!
1424                 
1425                 // TODO Currently disabled, we want to keep files in case of failure!
1426                 // CleanLocalStorage(fgkLocalCDB);
1427                 // CleanLocalStorage(fgkLocalRefStorage);
1428                 // UpdateTableFailCase();
1429                 
1430                 // Send mail to detector expert!
1431                 Log("SHUTTLE", Form("ContinueProcessing - Sending mail to %s expert...", 
1432                                         fCurrentDetector.Data()));
1433                 if (!SendMail())
1434                         Log("SHUTTLE", Form("ContinueProcessing - Could not send mail to %s expert",
1435                                         fCurrentDetector.Data()));
1436
1437         } else {
1438                 Log("SHUTTLE", Form("ContinueProcessing - %s: restarting. "
1439                                 "Aborted before with %s. Retry number %d.", fCurrentDetector.Data(),
1440                                 status->GetStatusName(), status->GetCount()));
1441                 Bool_t increaseCount = kTRUE;
1442                 if (status->GetStatus() == AliShuttleStatus::kDCSError || 
1443                         status->GetStatus() == AliShuttleStatus::kDCSStarted)
1444                                 increaseCount = kFALSE;
1445                                 
1446                 UpdateShuttleStatus(AliShuttleStatus::kStarted, increaseCount);
1447                 cont = kTRUE;
1448         }
1449
1450         return cont;
1451 }
1452
1453 //______________________________________________________________________________________________
1454 Bool_t AliShuttle::Process(AliShuttleLogbookEntry* entry)
1455 {
1456         //
1457         // Makes data retrieval for all detectors in the configuration.
1458         // entry: Shuttle logbook entry, contains run paramenters and status of detectors
1459         // (Unprocessed, Inactive, Failed or Done).
1460         // Returns kFALSE in case of error occured and kTRUE otherwise
1461         //
1462
1463         if (!entry) return kFALSE;
1464
1465         fLogbookEntry = entry;
1466
1467         Log("SHUTTLE", Form("\t\t\t^*^*^*^*^*^*^*^*^*^*^*^* run %d: START ^*^*^*^*^*^*^*^*^*^*^*^*",
1468                                         GetCurrentRun()));
1469
1470         // Send the information to ML
1471         TMonaLisaText  mlStatus("SHUTTLE_status", "Processing");
1472         TMonaLisaText  mlRunType("SHUTTLE_runtype", Form("%s (%s)", entry->GetRunType(), entry->GetRunParameter("log")));
1473
1474         TList mlList;
1475         mlList.Add(&mlStatus);
1476         mlList.Add(&mlRunType);
1477
1478         TString mlID;
1479         mlID.Form("%d", GetCurrentRun());
1480         fMonaLisa->SendParameters(&mlList, mlID);
1481
1482         if (fLogbookEntry->IsDone())
1483         {
1484                 Log("SHUTTLE","Process - Shuttle is already DONE. Updating logbook");
1485                 UpdateShuttleLogbook("shuttle_done");
1486                 fLogbookEntry = 0;
1487                 return kTRUE;
1488         }
1489
1490         // read test mode if flag is set
1491         if (fReadTestMode)
1492         {
1493                 fTestMode = kNone;
1494                 TString logEntry(entry->GetRunParameter("log"));
1495                 //printf("log entry = %s\n", logEntry.Data());
1496                 TString searchStr("Testmode: ");
1497                 Int_t pos = logEntry.Index(searchStr.Data());
1498                 //printf("%d\n", pos);
1499                 if (pos >= 0)
1500                 {
1501                         TSubString subStr = logEntry(pos + searchStr.Length(), logEntry.Length());
1502                         //printf("%s\n", subStr.String().Data());
1503                         TString newStr(subStr.Data());
1504                         TObjArray* token = newStr.Tokenize(' ');
1505                         if (token)
1506                         {
1507                                 //token->Print();
1508                                 TObjString* tmpStr = dynamic_cast<TObjString*> (token->First());
1509                                 if (tmpStr)
1510                                 {
1511                                         Int_t testMode = tmpStr->String().Atoi();
1512                                         if (testMode > 0)
1513                                         {
1514                                                 Log("SHUTTLE", Form("Process - Enabling test mode %d", testMode));
1515                                                 SetTestMode((TestMode) testMode);
1516                                         }
1517                                 }
1518                                 delete token;          
1519                         }
1520                 }
1521         }
1522                 
1523         fLogbookEntry->Print("all");
1524
1525         // Initialization
1526         Bool_t hasError = kFALSE;
1527
1528         // Set the CDB and Reference folders according to the year and LHC period
1529         TString lhcPeriod(GetLHCPeriod());
1530         if (lhcPeriod.Length() == 0) 
1531         {
1532                 Log("SHUTTLE","Process - LHCPeriod not found in logbook!");
1533                 return 0; 
1534         }       
1535         
1536         if (fgkMainCDB.Length() == 0)
1537                 fgkMainCDB = Form("alien://folder=/alice/data/%d/%s/OCDB?user=alidaq?cacheFold=/tmp/OCDBCache", 
1538                                         GetCurrentYear(), lhcPeriod.Data());
1539         
1540         if (fgkMainRefStorage.Length() == 0)
1541                 fgkMainRefStorage = Form("alien://folder=/alice/data/%d/%s/Reference?user=alidaq?cacheFold=/tmp/OCDBCache", 
1542                                         GetCurrentYear(), lhcPeriod.Data());
1543         
1544         // Loop on detectors in the configuration
1545         TIter iter(fConfig->GetDetectors());
1546         TObjString* aDetector = 0;
1547
1548         Bool_t first = kTRUE;
1549
1550         while ((aDetector = (TObjString*) iter.Next()))
1551         {
1552                 fCurrentDetector = aDetector->String();
1553
1554                 if (ContinueProcessing() == kFALSE) continue;
1555                 
1556                 if (first)
1557                 {
1558                   // only read QueryCDB when needed and only once
1559                   AliCDBStorage *mainCDBSto = AliCDBManager::Instance()->GetStorage(fgkMainCDB);
1560                   if(mainCDBSto) mainCDBSto->QueryCDB(GetCurrentRun());
1561                   AliCDBStorage *mainRefSto = AliCDBManager::Instance()->GetStorage(fgkMainRefStorage);
1562                   if(mainRefSto) mainRefSto->QueryCDB(GetCurrentRun());
1563                   first = kFALSE;
1564                 }
1565
1566                 Log("SHUTTLE", Form("\t\t\t****** run %d - %s: START  ******",
1567                                                 GetCurrentRun(), aDetector->GetName()));
1568
1569                 for(Int_t iSys=0;iSys<3;iSys++) fFXSCalled[iSys]=kFALSE;
1570
1571                 Log(fCurrentDetector.Data(), "Process - Starting processing");
1572
1573                 Int_t pid = fork();
1574
1575                 if (pid < 0)
1576                 {
1577                         Log("SHUTTLE", "Process - ERROR: Forking failed");
1578                 }
1579                 else if (pid > 0)
1580                 {
1581                         // parent
1582                         Log("SHUTTLE", Form("Process - In parent process of %d - %s: Starting monitoring",
1583                                                         GetCurrentRun(), aDetector->GetName()));
1584
1585                         Long_t begin = time(0);
1586
1587                         int status; // to be used with waitpid, on purpose an int (not Int_t)!
1588                         while (waitpid(pid, &status, WNOHANG) == 0)
1589                         {
1590                                 Long_t expiredTime = time(0) - begin;
1591
1592                                 if (expiredTime > fConfig->GetPPTimeOut())
1593                                 {
1594                                         TString tmp;
1595                                         tmp.Form("Process - Process of %s time out. "
1596                                                         "Run time: %d seconds. Killing...",
1597                                                         fCurrentDetector.Data(), expiredTime);
1598                                         Log("SHUTTLE", tmp);
1599                                         Log(fCurrentDetector, tmp);
1600
1601                                         kill(pid, 9);
1602
1603                                         UpdateShuttleStatus(AliShuttleStatus::kPPTimeOut);
1604                                         hasError = kTRUE;
1605
1606                                         gSystem->Sleep(1000);
1607                                 }
1608                                 else
1609                                 {
1610                                         gSystem->Sleep(1000);
1611                                         
1612                                         TString checkStr;
1613                                         checkStr.Form("ps -o vsize --pid %d | tail -n 1", pid);
1614                                         FILE* pipe = gSystem->OpenPipe(checkStr, "r");
1615                                         if (!pipe)
1616                                         {
1617                                                 Log("SHUTTLE", Form("Process - Error: "
1618                                                         "Could not open pipe to %s", checkStr.Data()));
1619                                                 continue;
1620                                         }
1621                                                 
1622                                         char buffer[100];
1623                                         if (!fgets(buffer, 100, pipe))
1624                                         {
1625                                                 Log("SHUTTLE", "Process - Error: ps did not return anything");
1626                                                 gSystem->ClosePipe(pipe);
1627                                                 continue;
1628                                         }
1629                                         gSystem->ClosePipe(pipe);
1630                                         
1631                                         //Log("SHUTTLE", Form("ps returned %s", buffer));
1632                                         
1633                                         Int_t mem = 0;
1634                                         if ((sscanf(buffer, "%d\n", &mem) != 1) || !mem)
1635                                         {
1636                                                 Log("SHUTTLE", "Process - Error: Could not parse output of ps");
1637                                                 continue;
1638                                         }
1639                                         
1640                                         if (expiredTime % 60 == 0)
1641                                         {
1642                                                 Log("SHUTTLE", Form("Process - %s: Checking process. "
1643                                                         "Run time: %d seconds - Memory consumption: %d KB",
1644                                                         fCurrentDetector.Data(), expiredTime, mem));
1645                                                 SendAlive();
1646                                         }
1647                                         
1648                                         if (mem > fConfig->GetPPMaxMem())
1649                                         {
1650                                                 TString tmp;
1651                                                 tmp.Form("Process - Process exceeds maximum allowed memory "
1652                                                         "(%d KB > %d KB). Killing...",
1653                                                         mem, fConfig->GetPPMaxMem());
1654                                                 Log("SHUTTLE", tmp);
1655                                                 Log(fCurrentDetector, tmp);
1656         
1657                                                 kill(pid, 9);
1658         
1659                                                 UpdateShuttleStatus(AliShuttleStatus::kPPOutOfMemory);
1660                                                 hasError = kTRUE;
1661         
1662                                                 gSystem->Sleep(1000);
1663                                         }
1664                                 }
1665                         }
1666
1667                         Log("SHUTTLE", Form("Process - In parent process of %d - %s: Client has terminated.",
1668                                                                 GetCurrentRun(), aDetector->GetName()));
1669
1670                         if (WIFEXITED(status))
1671                         {
1672                                 Int_t returnCode = WEXITSTATUS(status);
1673
1674                                 Log("SHUTTLE", Form("Process - %s: the return code is %d", fCurrentDetector.Data(),
1675                                                                                 returnCode));
1676
1677                                 if (returnCode == 0) hasError = kTRUE;
1678                         }
1679                 }
1680                 else if (pid == 0)
1681                 {
1682                         // client
1683                         Log("SHUTTLE", Form("Process - In client process of %d - %s", GetCurrentRun(),
1684                                 aDetector->GetName()));
1685
1686                         Log("SHUTTLE", Form("Process - Redirecting output to %s log",fCurrentDetector.Data()));
1687
1688                         if ((freopen(GetLogFileName(fCurrentDetector), "a", stdout)) == 0)
1689                         {
1690                                 Log("SHUTTLE", "Process - Could not freopen stdout");
1691                         }
1692                         else
1693                         {
1694                                 fOutputRedirected = kTRUE;
1695                                 if ((dup2(fileno(stdout), fileno(stderr))) < 0)
1696                                         Log("SHUTTLE", "Process - Could not redirect stderr");
1697                                 
1698                         }
1699                         
1700                         TString wd = gSystem->WorkingDirectory();
1701                         TString tmpDir = Form("%s/%s_%d_process", GetShuttleTempDir(), 
1702                                 fCurrentDetector.Data(), GetCurrentRun());
1703                         
1704                         Int_t result = gSystem->GetPathInfo(tmpDir.Data(), 0, (Long64_t*) 0, 0, 0);
1705                         if (!result) // temp dir already exists!
1706                         {
1707                                 Log(fCurrentDetector.Data(), 
1708                                         Form("Process - %s dir already exists! Removing...", tmpDir.Data()));
1709                                 gSystem->Exec(Form("rm -rf %s",tmpDir.Data()));         
1710                         } 
1711                         
1712                         if (gSystem->mkdir(tmpDir.Data(), 1))
1713                         {
1714                                 Log(fCurrentDetector.Data(), "Process - could not make temp directory!!");
1715                                 gSystem->Exit(1);
1716                         }
1717                         
1718                         if (!gSystem->ChangeDirectory(tmpDir.Data())) 
1719                         {
1720                                 Log(fCurrentDetector.Data(), "Process - could not change directory!!");
1721                                 gSystem->Exit(1);                       
1722                         }
1723                         
1724                         Bool_t success = ProcessCurrentDetector();
1725                         
1726                         gSystem->ChangeDirectory(wd.Data());
1727                                                 
1728                         if (success) // Preprocessor finished successfully!
1729                         { 
1730                                 // remove temporary folder
1731                                 // temporary commented (JF)
1732                                 //gSystem->Exec(Form("rm -rf %s",tmpDir.Data()));
1733                                 
1734                                 // Update time_processed field in FXS DB
1735                                 if (UpdateTable() == kFALSE)
1736                                         Log("SHUTTLE", Form("Process - %s: Could not update FXS databases!", 
1737                                                         fCurrentDetector.Data()));
1738
1739                                 // Transfer the data from local storage to main storage (Grid)
1740                                 UpdateShuttleStatus(AliShuttleStatus::kStoreStarted);
1741                                 if (StoreOCDB() == kFALSE)
1742                                 {
1743                                         Log("SHUTTLE", 
1744                                                 Form("\t\t\t****** run %d - %s: STORAGE ERROR ******",
1745                                                         GetCurrentRun(), aDetector->GetName()));
1746                                         UpdateShuttleStatus(AliShuttleStatus::kStoreError);
1747                                         success = kFALSE;
1748                                 } else {
1749                                         Log("SHUTTLE", 
1750                                                 Form("\t\t\t****** run %d - %s: DONE ******",
1751                                                         GetCurrentRun(), aDetector->GetName()));
1752                                         UpdateShuttleStatus(AliShuttleStatus::kDone);
1753                                         UpdateShuttleLogbook(fCurrentDetector, "DONE");
1754                                 }
1755                         } else 
1756                         {
1757                                 Log("SHUTTLE", 
1758                                         Form("\t\t\t****** run %d - %s: PP ERROR ******",
1759                                                 GetCurrentRun(), aDetector->GetName()));
1760                         }
1761
1762                         for (UInt_t iSys=0; iSys<3; iSys++)
1763                         {
1764                                 if (fFXSCalled[iSys]) fFXSlist[iSys].Clear();
1765                         }
1766
1767                         Log("SHUTTLE", Form("Process - Client process of %d - %s is exiting now with %d.",
1768                                                         GetCurrentRun(), aDetector->GetName(), success));
1769
1770                         // the client exits here
1771                         gSystem->Exit(success);
1772
1773                         AliError("We should never get here!!!");
1774                 }
1775         }
1776
1777         Log("SHUTTLE", Form("\t\t\t^*^*^*^*^*^*^*^*^*^*^*^* run %d: FINISH ^*^*^*^*^*^*^*^*^*^*^*^*",
1778                                                         GetCurrentRun()));
1779
1780         //check if shuttle is done for this run, if so update logbook
1781         TObjArray checkEntryArray;
1782         checkEntryArray.SetOwner(1);
1783         TString whereClause = Form("where run=%d", GetCurrentRun());
1784         if (!QueryShuttleLogbook(whereClause.Data(), checkEntryArray) || 
1785                         checkEntryArray.GetEntries() == 0) {
1786                 Log("SHUTTLE", Form("Process - Warning: Cannot check status of run %d on Shuttle logbook!",
1787                                                 GetCurrentRun()));
1788                 return hasError == kFALSE;
1789         }
1790
1791         AliShuttleLogbookEntry* checkEntry = dynamic_cast<AliShuttleLogbookEntry*>
1792                                                 (checkEntryArray.At(0));
1793
1794         if (checkEntry)
1795         {
1796                 if (checkEntry->IsDone())
1797                 {
1798                         Log("SHUTTLE","Process - Shuttle is DONE. Updating logbook");
1799                         UpdateShuttleLogbook("shuttle_done");
1800                 }
1801                 else
1802                 {
1803                         for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
1804                         {
1805                                 if (checkEntry->GetDetectorStatus(iDet) == AliShuttleLogbookEntry::kUnprocessed)
1806                                 {
1807                                         AliDebug(2, Form("Run %d: setting %s as \"not first time unprocessed\"",
1808                                                         checkEntry->GetRun(), GetDetName(iDet)));
1809                                         fFirstUnprocessed[iDet] = kFALSE;
1810                                 }
1811                         }
1812                 }
1813         }
1814
1815         fLogbookEntry = 0;
1816
1817         return hasError == kFALSE;
1818 }
1819
1820 //______________________________________________________________________________________________
1821 Bool_t AliShuttle::ProcessCurrentDetector()
1822 {
1823         //
1824         // Makes data retrieval just for a specific detector (fCurrentDetector).
1825         // Threre should be a configuration for this detector.
1826
1827         Log("SHUTTLE", Form("ProcessCurrentDetector - Retrieving values for %s, run %d", 
1828                                                 fCurrentDetector.Data(), GetCurrentRun()));
1829
1830         TString wd = gSystem->WorkingDirectory();
1831         
1832         if (!CleanReferenceStorage(fCurrentDetector.Data()))
1833                 return kFALSE;
1834         
1835         gSystem->ChangeDirectory(wd.Data());
1836         
1837         TMap* dcsMap = new TMap();
1838
1839         // call preprocessor
1840         AliPreprocessor* aPreprocessor =
1841                 dynamic_cast<AliPreprocessor*> (fPreprocessorMap.GetValue(fCurrentDetector));
1842
1843         aPreprocessor->Initialize(GetCurrentRun(), GetCurrentStartTime(), GetCurrentEndTime());
1844
1845         Bool_t processDCS = aPreprocessor->ProcessDCS();
1846
1847         if (!processDCS)
1848         {
1849                 Log(fCurrentDetector, "ProcessCurrentDetector -"
1850                         " The preprocessor requested to skip the retrieval of DCS values");
1851         }
1852         else if (fTestMode & kSkipDCS)
1853         {
1854                 Log(fCurrentDetector, "ProcessCurrentDetector - In TESTMODE: Skipping DCS processing");
1855         } 
1856         else if (fTestMode & kErrorDCS)
1857         {
1858                 Log(fCurrentDetector, "ProcessCurrentDetector - In TESTMODE: Simulating DCS error");
1859                 UpdateShuttleStatus(AliShuttleStatus::kDCSStarted);
1860                 UpdateShuttleStatus(AliShuttleStatus::kDCSError);
1861                 delete dcsMap;
1862                 return kFALSE;
1863         } else {
1864
1865                 UpdateShuttleStatus(AliShuttleStatus::kDCSStarted);
1866
1867                 // Query DCS archive
1868                 Int_t nServers = fConfig->GetNServers(fCurrentDetector);
1869                 
1870                 for (int iServ=0; iServ<nServers; iServ++)
1871                 {
1872                 
1873                         TString host(fConfig->GetDCSHost(fCurrentDetector, iServ));
1874                         Int_t port = fConfig->GetDCSPort(fCurrentDetector, iServ);
1875                         Int_t multiSplit = fConfig->GetMultiSplit(fCurrentDetector, iServ);
1876
1877                         Log(fCurrentDetector, Form("ProcessCurrentDetector -"
1878                                         " Querying DCS Amanda server %s:%d (%d of %d)", 
1879                                         host.Data(), port, iServ+1, nServers));
1880                         
1881                         TMap* aliasMap = 0;
1882                         TMap* dpMap = 0;
1883         
1884                         if (fConfig->GetDCSAliases(fCurrentDetector, iServ)->GetEntries() > 0)
1885                         {
1886                                 aliasMap = GetValueSet(host, port, 
1887                                                 fConfig->GetDCSAliases(fCurrentDetector, iServ), 
1888                                                 kAlias, multiSplit);
1889                                 if (!aliasMap)
1890                                 {
1891                                         Log(fCurrentDetector, 
1892                                                 Form("ProcessCurrentDetector -"
1893                                                         " Error retrieving DCS aliases from server %s."
1894                                                         " Sending mail to DCS experts!", host.Data()));
1895                                         UpdateShuttleStatus(AliShuttleStatus::kDCSError);
1896                                         
1897                                         //if (!SendMailToDCS())
1898                                         //      Log("SHUTTLE", Form("ProcessCurrentDetector - Could not send mail to DCS experts!"));
1899
1900                                         delete dcsMap;
1901                                         return kFALSE;
1902                                 }
1903                         }
1904                         
1905                         if (fConfig->GetDCSDataPoints(fCurrentDetector, iServ)->GetEntries() > 0)
1906                         {
1907                                 dpMap = GetValueSet(host, port, 
1908                                                 fConfig->GetDCSDataPoints(fCurrentDetector, iServ), 
1909                                                 kDP, multiSplit);
1910                                 if (!dpMap)
1911                                 {
1912                                         Log(fCurrentDetector, 
1913                                                 Form("ProcessCurrentDetector -"
1914                                                         " Error retrieving DCS data points from server %s."
1915                                                         " Sending mail to DCS experts!", host.Data()));
1916                                         UpdateShuttleStatus(AliShuttleStatus::kDCSError);
1917                                         
1918                                         //if (!SendMailToDCS())
1919                                         //      Log("SHUTTLE", Form("ProcessCurrentDetector - Could not send mail to DCS experts!"));
1920                                         
1921                                         if (aliasMap) delete aliasMap;
1922                                         delete dcsMap;
1923                                         return kFALSE;
1924                                 }                               
1925                         }
1926                         
1927                         // merge aliasMap and dpMap into dcsMap
1928                         if(aliasMap) {
1929                                 TIter iter(aliasMap);
1930                                 TObjString* key = 0;
1931                                 while ((key = (TObjString*) iter.Next()))
1932                                         dcsMap->Add(key, aliasMap->GetValue(key->String()));
1933                                 
1934                                 aliasMap->SetOwner(kFALSE);
1935                                 delete aliasMap;
1936                         }       
1937                         
1938                         if(dpMap) {
1939                                 TIter iter(dpMap);
1940                                 TObjString* key = 0;
1941                                 while ((key = (TObjString*) iter.Next()))
1942                                         dcsMap->Add(key, dpMap->GetValue(key->String()));
1943                                 
1944                                 dpMap->SetOwner(kFALSE);
1945                                 delete dpMap;
1946                         }
1947                 }
1948         }
1949         
1950         // save map into file, to help debugging in case of preprocessor error
1951         TFile* f = TFile::Open("DCSMap.root","recreate");
1952         f->cd();
1953         dcsMap->Write("DCSMap", TObject::kSingleKey);
1954         f->Close();
1955         delete f;
1956         
1957         // DCS Archive DB processing successful. Call Preprocessor!
1958         UpdateShuttleStatus(AliShuttleStatus::kPPStarted);
1959
1960         UInt_t returnValue = aPreprocessor->Process(dcsMap);
1961
1962         if (returnValue > 0) // Preprocessor error!
1963         {
1964                 Log(fCurrentDetector, Form("ProcessCurrentDetector - "
1965                                 "Preprocessor failed. Process returned %d.", returnValue));
1966                 UpdateShuttleStatus(AliShuttleStatus::kPPError);
1967                 dcsMap->DeleteAll();
1968                 delete dcsMap;
1969                 return kFALSE;
1970         }
1971         
1972         // preprocessor ok!
1973         UpdateShuttleStatus(AliShuttleStatus::kPPDone);
1974         Log(fCurrentDetector, Form("ProcessCurrentDetector - %s preprocessor returned success",
1975                                 fCurrentDetector.Data()));
1976
1977         dcsMap->DeleteAll();
1978         delete dcsMap;
1979
1980         return kTRUE;
1981 }
1982
1983 //______________________________________________________________________________________________
1984 void AliShuttle::CountOpenRuns()
1985 {
1986         // Query DAQ's Shuttle logbook and sends the number of open runs to ML
1987         
1988         // check connection, in case connect
1989         if (!Connect(3)) 
1990                 return;
1991
1992         TString sqlQuery;
1993         sqlQuery = Form("select count(*) from %s where shuttle_done=0", fConfig->GetShuttlelbTable());
1994         
1995         TSQLResult* aResult = fServer[3]->Query(sqlQuery);
1996         if (!aResult) {
1997                 AliError(Form("Can't execute query <%s>!", sqlQuery.Data()));
1998                 return;
1999         }
2000
2001         AliDebug(2,Form("Query = %s", sqlQuery.Data()));
2002         
2003         if (aResult->GetRowCount() == 0) {
2004                 AliError(Form("No result for query %s received", sqlQuery.Data()));
2005                 return;
2006         }
2007
2008         if (aResult->GetFieldCount() != 1) {
2009                 AliError(Form("Invalid field count for query %s received", sqlQuery.Data()));
2010                 return;
2011         }
2012
2013         TSQLRow* aRow = aResult->Next();
2014         if (!aRow) {
2015                 AliError(Form("Could not receive result of query %s", sqlQuery.Data()));
2016                 return;
2017         }
2018         
2019         TString result(aRow->GetField(0), aRow->GetFieldLength(0));
2020         Int_t count = result.Atoi();
2021         
2022         Log("SHUTTLE", Form("%d unprocessed runs", count));
2023         
2024         delete aRow;
2025         delete aResult;
2026
2027         TMonaLisaValue mlStatus("SHUTTLE_openruns", count);
2028
2029         TList mlList;
2030         mlList.Add(&mlStatus);
2031
2032         fMonaLisa->SendParameters(&mlList, "__PROCESSINGINFO__");
2033 }
2034
2035 //______________________________________________________________________________________________
2036 Bool_t AliShuttle::QueryShuttleLogbook(const char* whereClause,
2037                 TObjArray& entries)
2038 {
2039         // Query DAQ's Shuttle logbook and fills detector status object.
2040         // Call QueryRunParameters to query DAQ logbook for run parameters.
2041         //
2042
2043         entries.SetOwner(1);
2044
2045         // check connection, in case connect
2046         if (!Connect(3)) return kFALSE;
2047
2048         TString sqlQuery;
2049         sqlQuery = Form("select * from %s %s order by run", fConfig->GetShuttlelbTable(), whereClause);
2050
2051         TSQLResult* aResult = fServer[3]->Query(sqlQuery);
2052         if (!aResult) {
2053                 AliError(Form("Can't execute query <%s>!", sqlQuery.Data()));
2054                 return kFALSE;
2055         }
2056
2057         AliDebug(2,Form("Query = %s", sqlQuery.Data()));
2058
2059         if(aResult->GetRowCount() == 0) {
2060                 Log("SHUTTLE", "No entries in Shuttle Logbook match request");
2061                 delete aResult;
2062                 return kTRUE;
2063         }
2064
2065         // TODO Check field count!
2066         const UInt_t nCols = 23;
2067         if (aResult->GetFieldCount() != (Int_t) nCols) {
2068                 Log("SHUTTLE", "Invalid SQL result field number!");
2069                 delete aResult;
2070                 return kFALSE;
2071         }
2072
2073         TSQLRow* aRow;
2074         while ((aRow = aResult->Next())) {
2075                 TString runString(aRow->GetField(0), aRow->GetFieldLength(0));
2076                 Int_t run = runString.Atoi();
2077
2078                 AliShuttleLogbookEntry *entry = QueryRunParameters(run);
2079                 if (!entry)
2080                         continue;
2081
2082                 // loop on detectors
2083                 for(UInt_t ii = 0; ii < nCols; ii++)
2084                         entry->SetDetectorStatus(aResult->GetFieldName(ii), aRow->GetField(ii));
2085
2086                 entries.AddLast(entry);
2087                 delete aRow;
2088         }
2089
2090         delete aResult;
2091         return kTRUE;
2092 }
2093
2094 //______________________________________________________________________________________________
2095 AliShuttleLogbookEntry* AliShuttle::QueryRunParameters(Int_t run)
2096 {
2097         //
2098         // Retrieve run parameters written in the DAQ logbook and sets them into AliShuttleLogbookEntry object
2099         //
2100
2101         // check connection, in case connect
2102         if (!Connect(3))
2103                 return 0;
2104
2105         TString sqlQuery;
2106         sqlQuery.Form("select * from %s where run=%d", fConfig->GetDAQlbTable(), run);
2107
2108         TSQLResult* aResult = fServer[3]->Query(sqlQuery);
2109         if (!aResult) {
2110                 Log("SHUTTLE", Form("Can't execute query <%s>!", sqlQuery.Data()));
2111                 return 0;
2112         }
2113
2114         if (aResult->GetRowCount() == 0) {
2115                 Log("SHUTTLE", Form("QueryRunParameters - No entry in DAQ Logbook for run %d. Skipping", run));
2116                 delete aResult;
2117                 return 0;
2118         }
2119
2120         if (aResult->GetRowCount() > 1) {
2121                 Log("SHUTTLE", Form("QueryRunParameters - UNEXPECTED: "
2122                                 "more than one entry in DAQ Logbook for run %d!", run));
2123                 delete aResult;
2124                 return 0;
2125         }
2126
2127         TSQLRow* aRow = aResult->Next();
2128         if (!aRow)
2129         {
2130                 Log("SHUTTLE", Form("QueryRunParameters - Could not retrieve row for run %d. Skipping", run));
2131                 delete aResult;
2132                 return 0;
2133         }
2134
2135         AliShuttleLogbookEntry* entry = new AliShuttleLogbookEntry(run);
2136
2137         for (Int_t ii = 0; ii < aResult->GetFieldCount(); ii++)
2138                 entry->SetRunParameter(aResult->GetFieldName(ii), aRow->GetField(ii));
2139
2140         UInt_t startTime = entry->GetStartTime();
2141         UInt_t endTime = entry->GetEndTime();
2142
2143 //      if (!startTime || !endTime || startTime > endTime) 
2144 //      {
2145 //              Log("SHUTTLE",
2146 //                      Form("QueryRunParameters - Invalid parameters for Run %d: startTime = %d, endTime = %d. Skipping!",
2147 //                              run, startTime, endTime));              
2148 //              
2149 //              Log("SHUTTLE", Form("Marking SHUTTLE done for run %d", run));
2150 //              fLogbookEntry = entry;  
2151 //              if (!UpdateShuttleLogbook("shuttle_done"))
2152 //              {
2153 //                      AliError(Form("Could not update logbook for run %d !", run));
2154 //              }
2155 //              fLogbookEntry = 0;
2156 //                              
2157 //              delete entry;
2158 //              delete aRow;
2159 //              delete aResult;
2160 //              return 0;
2161 //      }
2162
2163         if (!startTime) 
2164         {
2165                 Log("SHUTTLE",
2166                         Form("QueryRunParameters - Invalid parameters for Run %d: " 
2167                                 "startTime = %d, endTime = %d. Skipping!",
2168                                         run, startTime, endTime));              
2169                 
2170                 Log("SHUTTLE", Form("Marking SHUTTLE done for run %d", run));
2171                 fLogbookEntry = entry;  
2172                 if (!UpdateShuttleLogbook("shuttle_ignored"))
2173                 {
2174                         AliError(Form("Could not update logbook for run %d !", run));
2175                 }
2176                 fLogbookEntry = 0;
2177                                 
2178                 delete entry;
2179                 delete aRow;
2180                 delete aResult;
2181                 return 0;
2182         }
2183         
2184         if (startTime && !endTime) 
2185         {
2186                 // TODO Here we don't mark SHUTTLE done, because this may mean 
2187                 //the run is still ongoing!!            
2188                 Log("SHUTTLE",
2189                         Form("QueryRunParameters - Invalid parameters for Run %d: "
2190                              "startTime = %d, endTime = %d. Skipping (Shuttle won't be marked as DONE)!",
2191                                         run, startTime, endTime));              
2192                 
2193                 //Log("SHUTTLE", Form("Marking SHUTTLE done for run %d", run));
2194                 //fLogbookEntry = entry;        
2195                 //if (!UpdateShuttleLogbook("shuttle_done"))
2196                 //{
2197                 //      AliError(Form("Could not update logbook for run %d !", run));
2198                 //}
2199                 //fLogbookEntry = 0;
2200                                 
2201                 delete entry;
2202                 delete aRow;
2203                 delete aResult;
2204                 return 0;
2205         }
2206                         
2207         if (startTime && endTime && (startTime > endTime)) 
2208         {
2209                 Log("SHUTTLE",
2210                         Form("QueryRunParameters - Invalid parameters for Run %d: "
2211                                 "startTime = %d, endTime = %d. Skipping!",
2212                                         run, startTime, endTime));              
2213                 
2214                 Log("SHUTTLE", Form("Marking SHUTTLE done for run %d", run));
2215                 fLogbookEntry = entry;  
2216                 if (!UpdateShuttleLogbook("shuttle_ignored"))
2217                 {
2218                         AliError(Form("Could not update logbook for run %d !", run));
2219                 }
2220                 fLogbookEntry = 0;
2221                                 
2222                 delete entry;
2223                 delete aRow;
2224                 delete aResult;
2225                 return 0;
2226         }
2227                         
2228         TString totEventsStr = entry->GetRunParameter("totalEvents");  
2229         Int_t totEvents = totEventsStr.Atoi();
2230         if (totEvents < 1) 
2231         {
2232                 Log("SHUTTLE",
2233                         Form("QueryRunParameters - Run %d has 0 events - Skipping!", run));             
2234                 
2235                 Log("SHUTTLE", Form("Marking SHUTTLE done for run %d", run));           
2236                 fLogbookEntry = entry;  
2237                 if (!UpdateShuttleLogbook("shuttle_ignored"))
2238                 {
2239                         AliError(Form("Could not update logbook for run %d !", run));
2240                 }
2241                 fLogbookEntry = 0;
2242                                 
2243                 delete entry;
2244                 delete aRow;
2245                 delete aResult;
2246                 return 0;
2247         }
2248
2249         delete aRow;
2250         delete aResult;
2251
2252         return entry;
2253 }
2254
2255 //______________________________________________________________________________________________
2256 TMap* AliShuttle::GetValueSet(const char* host, Int_t port, const TSeqCollection* entries,
2257                               DCSType type, Int_t multiSplit)
2258 {
2259         // Retrieve all "entry" data points from the DCS server
2260         // host, port: TSocket connection parameters
2261         // entries: list of name of the alias or data point
2262         // type: kAlias or kDP
2263         // returns TMap of values, 0 when failure
2264         
2265         AliDCSClient client(host, port, fTimeout, fRetries, multiSplit);
2266
2267         TMap* result = 0;
2268         if (type == kAlias)
2269         {
2270                 result = client.GetAliasValues(entries, GetCurrentStartTime(), 
2271                         GetCurrentEndTime());
2272         } 
2273         else if (type == kDP)
2274         {
2275                 result = client.GetDPValues(entries, GetCurrentStartTime(), 
2276                         GetCurrentEndTime());
2277         }
2278
2279         if (result == 0)
2280         {
2281                 Log(fCurrentDetector.Data(), Form("GetValueSet - Can't get entries! Reason: %s",
2282                         client.GetErrorString(client.GetResultErrorCode())));
2283                 if (client.GetResultErrorCode() == AliDCSClient::fgkServerError)        
2284                         Log(fCurrentDetector.Data(), Form("GetValueSet - Server error code: %s",
2285                                 client.GetServerError().Data()));
2286
2287                 return 0;
2288         }
2289                 
2290         return result;
2291 }
2292
2293 //______________________________________________________________________________________________
2294 const char* AliShuttle::GetFile(Int_t system, const char* detector,
2295                 const char* id, const char* source)
2296 {
2297         // Get calibration file from file exchange servers
2298         // First queris the FXS database for the file name, using the run, detector, id and source info
2299         // then calls RetrieveFile(filename) for actual copy to local disk
2300         // run: current run being processed (given by Logbook entry fLogbookEntry)
2301         // detector: the Preprocessor name
2302         // id: provided as a parameter by the Preprocessor
2303         // source: provided by the Preprocessor through GetFileSources function
2304
2305         // check if test mode should simulate a FXS error
2306         if (fTestMode & kErrorFXSFiles)
2307         {
2308                 Log(detector, Form("GetFile - In TESTMODE - Simulating error while connecting to %s FXS", GetSystemName(system)));
2309                 return 0;
2310         }
2311         
2312         // check connection, in case connect
2313         if (!Connect(system))
2314         {
2315                 Log(detector, Form("GetFile - Couldn't connect to %s FXS database", GetSystemName(system)));
2316                 return 0;
2317         }
2318
2319         // Query preparation
2320         TString sourceName(source);
2321         Int_t nFields = 3;
2322         TString sqlQueryStart = Form("select filePath,size,fileChecksum from %s where",
2323                                                                 fConfig->GetFXSdbTable(system));
2324         TString whereClause = Form("run=%d and detector=\"%s\" and fileId=\"%s\"",
2325                                                                 GetCurrentRun(), detector, id);
2326
2327         if (system == kDAQ)
2328         {
2329                 whereClause += Form(" and DAQsource=\"%s\"", source);
2330         }
2331         else if (system == kDCS)
2332         {
2333                 sourceName="none";
2334         }
2335         else if (system == kHLT)
2336         {
2337                 whereClause += Form(" and DDLnumbers=\"%s\"", source);
2338                 nFields = 3;
2339         }
2340
2341         TString sqlQuery = Form("%s %s", sqlQueryStart.Data(), whereClause.Data());
2342
2343         AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
2344
2345         // Query execution
2346         TSQLResult* aResult = 0;
2347         aResult = dynamic_cast<TSQLResult*> (fServer[system]->Query(sqlQuery));
2348         if (!aResult) {
2349                 Log(detector, Form("GetFileName - Can't execute SQL query to %s database for: id = %s, source = %s",
2350                                 GetSystemName(system), id, sourceName.Data()));
2351                 return 0;
2352         }
2353
2354         if(aResult->GetRowCount() == 0)
2355         {
2356                 Log(detector,
2357                         Form("GetFileName - No entry in %s FXS db for: id = %s, source = %s",
2358                                 GetSystemName(system), id, sourceName.Data()));
2359                 delete aResult;
2360                 return 0;
2361         }
2362
2363         if (aResult->GetRowCount() > 1) {
2364                 Log(detector,
2365                         Form("GetFileName - More than one entry in %s FXS db for: id = %s, source = %s",
2366                                 GetSystemName(system), id, sourceName.Data()));
2367                 delete aResult;
2368                 return 0;
2369         }
2370
2371         if (aResult->GetFieldCount() != nFields) {
2372                 Log(detector,
2373                         Form("GetFileName - Wrong field count in %s FXS db for: id = %s, source = %s",
2374                                 GetSystemName(system), id, sourceName.Data()));
2375                 delete aResult;
2376                 return 0;
2377         }
2378
2379         TSQLRow* aRow = dynamic_cast<TSQLRow*> (aResult->Next());
2380
2381         if (!aRow){
2382                 Log(detector, Form("GetFileName - Empty set result in %s FXS db from query: id = %s, source = %s",
2383                                 GetSystemName(system), id, sourceName.Data()));
2384                 delete aResult;
2385                 return 0;
2386         }
2387
2388         TString filePath(aRow->GetField(0), aRow->GetFieldLength(0));
2389         TString fileSize(aRow->GetField(1), aRow->GetFieldLength(1));
2390         TString fileChecksum(aRow->GetField(2), aRow->GetFieldLength(2));
2391
2392         delete aResult;
2393         delete aRow;
2394
2395         AliDebug(2, Form("filePath = %s; size = %s, fileChecksum = %s",
2396                                 filePath.Data(), fileSize.Data(), fileChecksum.Data()));
2397
2398         // retrieved file is renamed to make it unique
2399         TString localFileName = Form("%s/%s_%d_process/%s_%s_%d_%s_%s.shuttle",
2400                                         GetShuttleTempDir(), detector, GetCurrentRun(),
2401                                         GetSystemName(system), detector, GetCurrentRun(), 
2402                                         id, sourceName.Data());
2403
2404
2405         // file retrieval from FXS
2406         UInt_t nRetries = 0;
2407         UInt_t maxRetries = 3;
2408         Bool_t result = kFALSE;
2409
2410         // copy!! if successful TSystem::Exec returns 0
2411         while(nRetries++ < maxRetries) {
2412                 AliDebug(2, Form("Trying to copy file. Retry # %d", nRetries));
2413                 result = RetrieveFile(system, filePath.Data(), localFileName.Data());
2414                 if(!result)
2415                 {
2416                         Log(detector, Form("GetFileName - Copy of file %s from %s FXS failed",
2417                                         filePath.Data(), GetSystemName(system)));
2418                         continue;
2419                 } 
2420
2421                 if (fileChecksum.Length()>0)
2422                 {
2423                         // compare md5sum of local file with the one stored in the FXS DB
2424                         Int_t md5Comp = gSystem->Exec(Form("md5sum %s |grep %s 2>&1 > /dev/null",
2425                                                 localFileName.Data(), fileChecksum.Data()));
2426
2427                         if (md5Comp != 0)
2428                         {
2429                                 Log(detector, Form("GetFileName - md5sum of file %s does not match with local copy!",
2430                                                         filePath.Data()));
2431                                 result = kFALSE;
2432                                 continue;
2433                         }
2434                 } else {
2435                         Log(fCurrentDetector, Form("GetFile - md5sum of file %s not set in %s database, skipping comparison",
2436                                                         filePath.Data(), GetSystemName(system)));
2437                 }
2438                 if (result) break;
2439         }
2440
2441         if(!result) return 0;
2442
2443         fFXSCalled[system]=kTRUE;
2444         TObjString *fileParams = new TObjString(Form("%s#!?!#%s", id, sourceName.Data()));
2445         fFXSlist[system].Add(fileParams);
2446
2447         static TString staticLocalFileName;
2448         staticLocalFileName.Form("%s", localFileName.Data());
2449         
2450         Log(fCurrentDetector, Form("GetFile - Retrieved file with id %s and "
2451                         "source %s from %s to %s", id, source, 
2452                         GetSystemName(system), localFileName.Data()));
2453                         
2454         return staticLocalFileName.Data();
2455 }
2456
2457 //______________________________________________________________________________________________
2458 Bool_t AliShuttle::RetrieveFile(UInt_t system, const char* fxsFileName, const char* localFileName)
2459 {
2460         //
2461         // Copies file from FXS to local Shuttle machine
2462         //
2463
2464         // check temp directory: trying to cd to temp; if it does not exist, create it
2465         AliDebug(2, Form("Copy file %s from %s FXS into %s",
2466                         GetSystemName(system), fxsFileName, localFileName));
2467                         
2468         TString tmpDir(localFileName);
2469         
2470         tmpDir = tmpDir(0,tmpDir.Last('/'));
2471
2472         Int_t noDir = gSystem->GetPathInfo(tmpDir.Data(), 0, (Long64_t*) 0, 0, 0);
2473         if (noDir) // temp dir does not exists!
2474         {
2475                 if (gSystem->mkdir(tmpDir.Data(), 1))
2476                 {
2477                         Log(fCurrentDetector.Data(), "RetrieveFile - could not make temp directory!!");
2478                         return kFALSE;
2479                 }
2480         }
2481
2482         TString baseFXSFolder;
2483         if (system == kDAQ)
2484         {
2485                 baseFXSFolder = "FES/";
2486         }
2487         else if (system == kDCS)
2488         {
2489                 baseFXSFolder = "";
2490         }
2491         else if (system == kHLT)
2492         {
2493                 baseFXSFolder = "/opt/FXS/";
2494         }
2495
2496
2497         TString command = Form("scp -oPort=%d -2 %s@%s:%s%s %s",
2498                 fConfig->GetFXSPort(system),
2499                 fConfig->GetFXSUser(system),
2500                 fConfig->GetFXSHost(system),
2501                 baseFXSFolder.Data(),
2502                 fxsFileName,
2503                 localFileName);
2504
2505         AliDebug(2, Form("%s",command.Data()));
2506
2507         Bool_t result = (gSystem->Exec(command.Data()) == 0);
2508
2509         return result;
2510 }
2511
2512 //______________________________________________________________________________________________
2513 TList* AliShuttle::GetFileSources(Int_t system, const char* detector, const char* id)
2514 {
2515         //
2516         // Get sources producing the condition file Id from file exchange servers
2517         // if id is NULL all sources are returned (distinct)
2518         //
2519
2520         if (id)
2521         {
2522                 Log(detector, Form("GetFileSources - Querying %s FXS for files with id %s produced by %s", GetSystemName(system), id, detector));
2523         } else {
2524                 Log(detector, Form("GetFileSources - Querying %s FXS for files produced by %s", GetSystemName(system), detector));
2525         }
2526         
2527         // check if test mode should simulate a FXS error
2528         if (fTestMode & kErrorFXSSources)
2529         {
2530                 Log(detector, Form("GetFileSources - In TESTMODE - Simulating error while connecting to %s FXS", GetSystemName(system)));
2531                 return 0;
2532         }
2533
2534         if (system == kDCS)
2535         {
2536                 Log(detector, "GetFileSources - WARNING: DCS system has only one source of data!");
2537                 TList *list = new TList();
2538                 list->SetOwner(1);
2539                 list->Add(new TObjString(" "));
2540                 return list;
2541         }
2542
2543         // check connection, in case connect
2544         if (!Connect(system))
2545         {
2546                 Log(detector, Form("GetFileSources - Couldn't connect to %s FXS database", GetSystemName(system)));
2547                 return NULL;
2548         }
2549
2550         TString sourceName = 0;
2551         if (system == kDAQ)
2552         {
2553                 sourceName = "DAQsource";
2554         } else if (system == kHLT)
2555         {
2556                 sourceName = "DDLnumbers";
2557         }
2558
2559         TString sqlQueryStart = Form("select distinct %s from %s where", sourceName.Data(), fConfig->GetFXSdbTable(system));
2560         TString whereClause = Form("run=%d and detector=\"%s\"",
2561                                 GetCurrentRun(), detector);
2562         if (id)
2563                 whereClause += Form(" and fileId=\"%s\"", id);
2564         TString sqlQuery = Form("%s %s", sqlQueryStart.Data(), whereClause.Data());
2565
2566         AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
2567
2568         // Query execution
2569         TSQLResult* aResult;
2570         aResult = fServer[system]->Query(sqlQuery);
2571         if (!aResult) {
2572                 Log(detector, Form("GetFileSources - Can't execute SQL query to %s database for id: %s",
2573                                 GetSystemName(system), id));
2574                 return 0;
2575         }
2576
2577         TList *list = new TList();
2578         list->SetOwner(1);
2579         
2580         if (aResult->GetRowCount() == 0)
2581         {
2582                 Log(detector,
2583                         Form("GetFileSources - No entry in %s FXS table for id: %s", GetSystemName(system), id));
2584                 delete aResult;
2585                 return list;
2586         }
2587
2588         Log(detector, Form("GetFileSources - Found %d sources", aResult->GetRowCount()));
2589
2590         TSQLRow* aRow;
2591         while ((aRow = aResult->Next()))
2592         {
2593
2594                 TString source(aRow->GetField(0), aRow->GetFieldLength(0));
2595                 AliDebug(2, Form("%s = %s", sourceName.Data(), source.Data()));
2596                 list->Add(new TObjString(source));
2597                 delete aRow;
2598         }
2599
2600         delete aResult;
2601
2602         return list;
2603 }
2604
2605 //______________________________________________________________________________________________
2606 TList* AliShuttle::GetFileIDs(Int_t system, const char* detector, const char* source)
2607 {
2608         //
2609         // Get all ids of condition files produced by a given source from file exchange servers
2610         //
2611         
2612         Log(detector, Form("GetFileIDs - Retrieving ids with source %s with %s", source, GetSystemName(system)));
2613
2614         // check if test mode should simulate a FXS error
2615         if (fTestMode & kErrorFXSSources)
2616         {
2617                 Log(detector, Form("GetFileIDs - In TESTMODE - Simulating error while connecting to %s FXS", GetSystemName(system)));
2618                 return 0;
2619         }
2620
2621         // check connection, in case connect
2622         if (!Connect(system))
2623         {
2624                 Log(detector, Form("GetFileIDs - Couldn't connect to %s FXS database", GetSystemName(system)));
2625                 return NULL;
2626         }
2627
2628         TString sourceName = 0;
2629         if (system == kDAQ)
2630         {
2631                 sourceName = "DAQsource";
2632         } else if (system == kHLT)
2633         {
2634                 sourceName = "DDLnumbers";
2635         }
2636
2637         TString sqlQueryStart = Form("select fileId from %s where", fConfig->GetFXSdbTable(system));
2638         TString whereClause = Form("run=%d and detector=\"%s\"",
2639                                 GetCurrentRun(), detector);
2640         if (sourceName.Length() > 0 && source)
2641                 whereClause += Form(" and %s=\"%s\"", sourceName.Data(), source);
2642         TString sqlQuery = Form("%s %s", sqlQueryStart.Data(), whereClause.Data());
2643
2644         AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
2645
2646         // Query execution
2647         TSQLResult* aResult;
2648         aResult = fServer[system]->Query(sqlQuery);
2649         if (!aResult) {
2650                 Log(detector, Form("GetFileIDs - Can't execute SQL query to %s database for source: %s",
2651                                 GetSystemName(system), source));
2652                 return 0;
2653         }
2654
2655         TList *list = new TList();
2656         list->SetOwner(1);
2657         
2658         if (aResult->GetRowCount() == 0)
2659         {
2660                 Log(detector,
2661                         Form("GetFileIDs - No entry in %s FXS table for source: %s", GetSystemName(system), source));
2662                 delete aResult;
2663                 return list;
2664         }
2665
2666         Log(detector, Form("GetFileIDs - Found %d ids", aResult->GetRowCount()));
2667
2668         TSQLRow* aRow;
2669
2670         while ((aRow = aResult->Next()))
2671         {
2672
2673                 TString id(aRow->GetField(0), aRow->GetFieldLength(0));
2674                 AliDebug(2, Form("fileId = %s", id.Data()));
2675                 list->Add(new TObjString(id));
2676                 delete aRow;
2677         }
2678
2679         delete aResult;
2680
2681         return list;
2682 }
2683
2684 //______________________________________________________________________________________________
2685 Bool_t AliShuttle::Connect(Int_t system)
2686 {
2687         // Connect to MySQL Server of the system's FXS MySQL databases
2688         // DAQ Logbook, Shuttle Logbook and DAQ FXS db are on the same host
2689         //
2690
2691         // check connection: if already connected return
2692         if(fServer[system] && fServer[system]->IsConnected()) return kTRUE;
2693
2694         TString dbHost, dbUser, dbPass, dbName;
2695
2696         if (system < 3) // FXS db servers
2697         {
2698                 dbHost = Form("mysql://%s:%d", fConfig->GetFXSdbHost(system), fConfig->GetFXSdbPort(system));
2699                 dbUser = fConfig->GetFXSdbUser(system);
2700                 dbPass = fConfig->GetFXSdbPass(system);
2701                 dbName =   fConfig->GetFXSdbName(system);
2702         } else { // Run & Shuttle logbook servers
2703         // TODO Will the Shuttle logbook server be the same as the Run logbook server ???
2704                 dbHost = Form("mysql://%s:%d", fConfig->GetDAQlbHost(), fConfig->GetDAQlbPort());
2705                 dbUser = fConfig->GetDAQlbUser();
2706                 dbPass = fConfig->GetDAQlbPass();
2707                 dbName =   fConfig->GetDAQlbDB();
2708         }
2709
2710         fServer[system] = TSQLServer::Connect(dbHost.Data(), dbUser.Data(), dbPass.Data());
2711         if (!fServer[system] || !fServer[system]->IsConnected()) {
2712                 if(system < 3)
2713                 {
2714                 AliError(Form("Can't establish connection to FXS database for %s",
2715                                         AliShuttleInterface::GetSystemName(system)));
2716                 } else {
2717                 AliError("Can't establish connection to Run logbook.");
2718                 }
2719                 if(fServer[system]) delete fServer[system];
2720                 return kFALSE;
2721         }
2722
2723         // Get tables
2724         TSQLResult* aResult=0;
2725         switch(system){
2726                 case kDAQ:
2727                         aResult = fServer[kDAQ]->GetTables(dbName.Data());
2728                         break;
2729                 case kDCS:
2730                         aResult = fServer[kDCS]->GetTables(dbName.Data());
2731                         break;
2732                 case kHLT:
2733                         aResult = fServer[kHLT]->GetTables(dbName.Data());
2734                         break;
2735                 default:
2736                         aResult = fServer[3]->GetTables(dbName.Data());
2737                         break;
2738         }
2739
2740         delete aResult;
2741         return kTRUE;
2742 }
2743
2744 //______________________________________________________________________________________________
2745 Bool_t AliShuttle::UpdateTable()
2746 {
2747         //
2748         // Update FXS table filling time_processed field in all rows corresponding to current run and detector
2749         //
2750
2751         Bool_t result = kTRUE;
2752
2753         for (UInt_t system=0; system<3; system++)
2754         {
2755                 if(!fFXSCalled[system]) continue;
2756
2757                 // check connection, in case connect
2758                 if (!Connect(system))
2759                 {
2760                         Log(fCurrentDetector, Form("UpdateTable - Couldn't connect to %s FXS database", GetSystemName(system)));
2761                         result = kFALSE;
2762                         continue;
2763                 }
2764
2765                 TTimeStamp now; // now
2766
2767                 // Loop on FXS list entries
2768                 TIter iter(&fFXSlist[system]);
2769                 TObjString *aFXSentry=0;
2770                 while ((aFXSentry = dynamic_cast<TObjString*> (iter.Next())))
2771                 {
2772                         TString aFXSentrystr = aFXSentry->String();
2773                         TObjArray *aFXSarray = aFXSentrystr.Tokenize("#!?!#");
2774                         if (!aFXSarray || aFXSarray->GetEntries() != 2 )
2775                         {
2776                                 Log(fCurrentDetector, Form("UpdateTable - error updating %s FXS entry. Check string: <%s>",
2777                                         GetSystemName(system), aFXSentrystr.Data()));
2778                                 if(aFXSarray) delete aFXSarray;
2779                                 result = kFALSE;
2780                                 continue;
2781                         }
2782                         const char* fileId = ((TObjString*) aFXSarray->At(0))->GetName();
2783                         const char* source = ((TObjString*) aFXSarray->At(1))->GetName();
2784
2785                         TString whereClause;
2786                         if (system == kDAQ)
2787                         {
2788                                 whereClause = Form("where run=%d and detector=\"%s\" and fileId=\"%s\" and DAQsource=\"%s\";",
2789                                                         GetCurrentRun(), fCurrentDetector.Data(), fileId, source);
2790                         }
2791                         else if (system == kDCS)
2792                         {
2793                                 whereClause = Form("where run=%d and detector=\"%s\" and fileId=\"%s\";",
2794                                                         GetCurrentRun(), fCurrentDetector.Data(), fileId);
2795                         }
2796                         else if (system == kHLT)
2797                         {
2798                                 whereClause = Form("where run=%d and detector=\"%s\" and fileId=\"%s\" and DDLnumbers=\"%s\";",
2799                                                         GetCurrentRun(), fCurrentDetector.Data(), fileId, source);
2800                         }
2801
2802                         delete aFXSarray;
2803
2804                         TString sqlQuery = Form("update %s set time_processed=%d %s", fConfig->GetFXSdbTable(system),
2805                                                                 now.GetSec(), whereClause.Data());
2806
2807                         AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
2808
2809                         // Query execution
2810                         TSQLResult* aResult;
2811                         aResult = dynamic_cast<TSQLResult*> (fServer[system]->Query(sqlQuery));
2812                         if (!aResult)
2813                         {
2814                                 Log(fCurrentDetector, Form("UpdateTable - %s db: can't execute SQL query <%s>",
2815                                                                 GetSystemName(system), sqlQuery.Data()));
2816                                 result = kFALSE;
2817                                 continue;
2818                         }
2819                         delete aResult;
2820                 }
2821         }
2822
2823         return result;
2824 }
2825
2826 //______________________________________________________________________________________________
2827 Bool_t AliShuttle::UpdateTableFailCase()
2828 {
2829         // Update FXS table filling time_processed field in all rows corresponding to current run and detector
2830         // this is called in case the preprocessor is declared failed for the current run, because
2831         // the fields are updated only in case of success
2832
2833         Bool_t result = kTRUE;
2834
2835         for (UInt_t system=0; system<3; system++)
2836         {
2837                 // check connection, in case connect
2838                 if (!Connect(system))
2839                 {
2840                         Log(fCurrentDetector, Form("UpdateTableFailCase - Couldn't connect to %s FXS database",
2841                                                         GetSystemName(system)));
2842                         result = kFALSE;
2843                         continue;
2844                 }
2845
2846                 TTimeStamp now; // now
2847
2848                 // Loop on FXS list entries
2849
2850                 TString whereClause = Form("where run=%d and detector=\"%s\";",
2851                                                 GetCurrentRun(), fCurrentDetector.Data());
2852
2853
2854                 TString sqlQuery = Form("update %s set time_processed=%d %s", fConfig->GetFXSdbTable(system),
2855                                                         now.GetSec(), whereClause.Data());
2856
2857                 AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
2858
2859                 // Query execution
2860                 TSQLResult* aResult;
2861                 aResult = dynamic_cast<TSQLResult*> (fServer[system]->Query(sqlQuery));
2862                 if (!aResult)
2863                 {
2864                         Log(fCurrentDetector, Form("UpdateTableFailCase - %s db: can't execute SQL query <%s>",
2865                                                         GetSystemName(system), sqlQuery.Data()));
2866                         result = kFALSE;
2867                         continue;
2868                 }
2869                 delete aResult;
2870         }
2871
2872         return result;
2873 }
2874
2875 //______________________________________________________________________________________________
2876 Bool_t AliShuttle::UpdateShuttleLogbook(const char* detector, const char* status)
2877 {
2878         //
2879         // Update Shuttle logbook filling detector or shuttle_done column
2880         // ex. of usage: UpdateShuttleLogbook("PHOS", "DONE") or UpdateShuttleLogbook("shuttle_done")
2881         //
2882
2883         // check connection, in case connect
2884         if(!Connect(3)){
2885                 Log("SHUTTLE", "UpdateShuttleLogbook - Couldn't connect to DAQ Logbook.");
2886                 return kFALSE;
2887         }
2888
2889         TString detName(detector);
2890         TString setClause;
2891         if (detName == "shuttle_done" || detName == "shuttle_ignored")
2892         {
2893                 setClause = "set shuttle_done=1";
2894
2895                 if (detName == "shuttle_done")
2896                 {
2897                         // Send the information to ML
2898                         TMonaLisaText  mlStatus("SHUTTLE_status", "Done");
2899
2900                         TList mlList;
2901                         mlList.Add(&mlStatus);
2902                 
2903                         TString mlID;
2904                         mlID.Form("%d", GetCurrentRun());
2905                         fMonaLisa->SendParameters(&mlList, mlID);
2906                 }
2907         } else {
2908                 TString statusStr(status);
2909                 if(statusStr.Contains("done", TString::kIgnoreCase) ||
2910                    statusStr.Contains("failed", TString::kIgnoreCase)){
2911                         setClause = Form("set %s=\"%s\"", detector, status);
2912                 } else {
2913                         Log("SHUTTLE",
2914                                 Form("UpdateShuttleLogbook - Invalid status <%s> for detector %s",
2915                                         status, detector));
2916                         return kFALSE;
2917                 }
2918         }
2919
2920         TString whereClause = Form("where run=%d", GetCurrentRun());
2921
2922         TString sqlQuery = Form("update %s %s %s",
2923                                         fConfig->GetShuttlelbTable(), setClause.Data(), whereClause.Data());
2924
2925         AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
2926
2927         // Query execution
2928         TSQLResult* aResult;
2929         aResult = dynamic_cast<TSQLResult*> (fServer[3]->Query(sqlQuery));
2930         if (!aResult) {
2931                 Log("SHUTTLE", Form("UpdateShuttleLogbook - Can't execute query <%s>", sqlQuery.Data()));
2932                 return kFALSE;
2933         }
2934         delete aResult;
2935
2936         return kTRUE;
2937 }
2938
2939 //______________________________________________________________________________________________
2940 Int_t AliShuttle::GetCurrentRun() const
2941 {
2942         //
2943         // Get current run from logbook entry
2944         //
2945
2946         return fLogbookEntry ? fLogbookEntry->GetRun() : -1;
2947 }
2948
2949 //______________________________________________________________________________________________
2950 UInt_t AliShuttle::GetCurrentStartTime() const
2951 {
2952         //
2953         // get current start time
2954         //
2955
2956         return fLogbookEntry ? fLogbookEntry->GetStartTime() : 0;
2957 }
2958
2959 //______________________________________________________________________________________________
2960 UInt_t AliShuttle::GetCurrentEndTime() const
2961 {
2962         //
2963         // get current end time from logbook entry
2964         //
2965
2966         return fLogbookEntry ? fLogbookEntry->GetEndTime() : 0;
2967 }
2968
2969 //______________________________________________________________________________________________
2970 UInt_t AliShuttle::GetCurrentYear() const
2971 {
2972         //
2973         // Get current year from logbook entry
2974         //
2975
2976         if (!fLogbookEntry) return 0;
2977         
2978         TTimeStamp startTime(GetCurrentStartTime());
2979         TString year =  Form("%d",startTime.GetDate());
2980         year = year(0,4);
2981         
2982         return year.Atoi();
2983 }
2984
2985 //______________________________________________________________________________________________
2986 const char* AliShuttle::GetLHCPeriod() const
2987 {
2988         //
2989         // Get current LHC period from logbook entry
2990         //
2991
2992         if (!fLogbookEntry) return 0;
2993                 
2994         return fLogbookEntry->GetRunParameter("LHCperiod");
2995 }
2996
2997 //______________________________________________________________________________________________
2998 void AliShuttle::Log(const char* detector, const char* message)
2999 {
3000         //
3001         // Fill log string with a message
3002         //
3003
3004         TString logRunDir = GetShuttleLogDir();
3005         if (GetCurrentRun() >=0)
3006                 logRunDir += Form("/%d", GetCurrentRun());
3007         
3008         void* dir = gSystem->OpenDirectory(logRunDir.Data());
3009         if (dir == NULL) {
3010                 if (gSystem->mkdir(logRunDir.Data(), kTRUE)) {
3011                         AliError(Form("Can't open directory <%s>", GetShuttleLogDir()));
3012                         return;
3013                 }
3014
3015         } else {
3016                 gSystem->FreeDirectory(dir);
3017         }
3018
3019         TString toLog = Form("%s (%d): %s - ", TTimeStamp(time(0)).AsString("s"), getpid(), detector);
3020         if (GetCurrentRun() >= 0) 
3021                 toLog += Form("run %d - ", GetCurrentRun());
3022         toLog += Form("%s", message);
3023
3024         AliInfo(toLog.Data());
3025         
3026         // if we redirect the log output already to the file, leave here
3027         if (fOutputRedirected && strcmp(detector, "SHUTTLE") != 0)
3028                 return;
3029
3030         TString fileName = GetLogFileName(detector);
3031         
3032         gSystem->ExpandPathName(fileName);
3033
3034         ofstream logFile;
3035         logFile.open(fileName, ofstream::out | ofstream::app);
3036
3037         if (!logFile.is_open()) {
3038                 AliError(Form("Could not open file %s", fileName.Data()));
3039                 return;
3040         }
3041
3042         logFile << toLog.Data() << "\n";
3043
3044         logFile.close();
3045 }
3046
3047 //______________________________________________________________________________________________
3048 TString AliShuttle::GetLogFileName(const char* detector) const
3049 {
3050         // 
3051         // returns the name of the log file for a given sub detector
3052         //
3053         
3054         TString fileName;
3055         
3056         if (GetCurrentRun() >= 0) 
3057         {
3058                 fileName.Form("%s/%d/%s_%d.log", GetShuttleLogDir(), GetCurrentRun(), 
3059                         detector, GetCurrentRun());
3060         } else {
3061                 fileName.Form("%s/%s.log", GetShuttleLogDir(), detector);
3062         }
3063
3064         return fileName;
3065 }
3066
3067 //______________________________________________________________________________________________
3068 void AliShuttle::SendAlive()
3069 {
3070         // sends alive message to ML
3071         
3072         TMonaLisaText mlStatus("SHUTTLE_status", "Alive");
3073
3074         TList mlList;
3075         mlList.Add(&mlStatus);
3076
3077         fMonaLisa->SendParameters(&mlList, "__PROCESSINGINFO__");
3078 }
3079
3080 //______________________________________________________________________________________________
3081 Bool_t AliShuttle::Collect(Int_t run)
3082 {
3083         //
3084         // Collects conditions data for all UNPROCESSED run written to DAQ LogBook in case of run = -1 (default)
3085         // If a dedicated run is given this run is processed
3086         //
3087         // In operational mode, this is the Shuttle function triggered by the EOR signal.
3088         //
3089
3090         if (run == -1)
3091                 Log("SHUTTLE","Collect - Shuttle called. Collecting conditions data for unprocessed runs");
3092         else
3093                 Log("SHUTTLE", Form("Collect - Shuttle called. Collecting conditions data for run %d", run));
3094
3095         SetLastAction("Starting");
3096
3097         // create ML instance
3098         if (!fMonaLisa)
3099                 fMonaLisa = new TMonaLisaWriter(fConfig->GetMonitorHost(), fConfig->GetMonitorTable());
3100                 
3101         SendAlive();
3102         CountOpenRuns();
3103
3104         TString whereClause("where shuttle_done=0");
3105         if (run != -1)
3106                 whereClause += Form(" and run=%d", run);
3107
3108         TObjArray shuttleLogbookEntries;
3109         if (!QueryShuttleLogbook(whereClause, shuttleLogbookEntries))
3110         {
3111                 Log("SHUTTLE", "Collect - Can't retrieve entries from Shuttle logbook");
3112                 return kFALSE;
3113         }
3114
3115         if (shuttleLogbookEntries.GetEntries() == 0)
3116         {
3117                 if (run == -1)
3118                         Log("SHUTTLE","Collect - Found no UNPROCESSED runs in Shuttle logbook");
3119                 else
3120                         Log("SHUTTLE", Form("Collect - Run %d is already DONE "
3121                                                 "or it does not exist in Shuttle logbook", run));
3122                 return kTRUE;
3123         }
3124
3125         for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
3126                 fFirstUnprocessed[iDet] = kTRUE;
3127
3128         if (run != -1)
3129         {
3130                 // query Shuttle logbook for earlier runs, check if some detectors are unprocessed,
3131                 // flag them into fFirstUnprocessed array
3132                 TString whereClause(Form("where shuttle_done=0 and run < %d", run));
3133                 TObjArray tmpLogbookEntries;
3134                 if (!QueryShuttleLogbook(whereClause, tmpLogbookEntries))
3135                 {
3136                         Log("SHUTTLE", "Collect - Can't retrieve entries from Shuttle logbook");
3137                         return kFALSE;
3138                 }
3139
3140                 TIter iter(&tmpLogbookEntries);
3141                 AliShuttleLogbookEntry* anEntry = 0;
3142                 while ((anEntry = dynamic_cast<AliShuttleLogbookEntry*> (iter.Next())))
3143                 {
3144                         for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
3145                         {
3146                                 if (anEntry->GetDetectorStatus(iDet) == AliShuttleLogbookEntry::kUnprocessed)
3147                                 {
3148                                         AliDebug(2, Form("Run %d: setting %s as \"not first time unprocessed\"",
3149                                                         anEntry->GetRun(), GetDetName(iDet)));
3150                                         fFirstUnprocessed[iDet] = kFALSE;
3151                                 }
3152                         }
3153
3154                 }
3155
3156         }
3157
3158         if (!RetrieveConditionsData(shuttleLogbookEntries))
3159         {
3160                 Log("SHUTTLE", "Collect - Process of at least one run failed");
3161                 return kFALSE;
3162         }
3163
3164         Log("SHUTTLE", "Collect - Requested run(s) successfully processed");
3165         return kTRUE;
3166 }
3167
3168 //______________________________________________________________________________________________
3169 Bool_t AliShuttle::RetrieveConditionsData(const TObjArray& dateEntries)
3170 {
3171         //
3172         // Retrieve conditions data for all runs that aren't processed yet
3173         //
3174
3175         Bool_t hasError = kFALSE;
3176
3177         TIter iter(&dateEntries);
3178         AliShuttleLogbookEntry* anEntry;
3179
3180         while ((anEntry = (AliShuttleLogbookEntry*) iter.Next())){
3181                 if (!Process(anEntry)){
3182                         hasError = kTRUE;
3183                 }
3184
3185                 // clean SHUTTLE temp directory
3186                 //TString filename = Form("%s/*.shuttle", GetShuttleTempDir());
3187                 //RemoveFile(filename.Data());
3188         }
3189
3190         return hasError == kFALSE;
3191 }
3192
3193 //______________________________________________________________________________________________
3194 ULong_t AliShuttle::GetTimeOfLastAction() const
3195 {
3196         //
3197         // Gets time of last action
3198         //
3199
3200         ULong_t tmp;
3201
3202         fMonitoringMutex->Lock();
3203
3204         tmp = fLastActionTime;
3205
3206         fMonitoringMutex->UnLock();
3207
3208         return tmp;
3209 }
3210
3211 //______________________________________________________________________________________________
3212 const TString AliShuttle::GetLastAction() const
3213 {
3214         //
3215         // returns a string description of the last action
3216         //
3217
3218         TString tmp;
3219
3220         fMonitoringMutex->Lock();
3221         
3222         tmp = fLastAction;
3223         
3224         fMonitoringMutex->UnLock();
3225
3226         return tmp;
3227 }
3228
3229 //______________________________________________________________________________________________
3230 void AliShuttle::SetLastAction(const char* action)
3231 {
3232         //
3233         // updates the monitoring variables
3234         //
3235
3236         fMonitoringMutex->Lock();
3237
3238         fLastAction = action;
3239         fLastActionTime = time(0);
3240         
3241         fMonitoringMutex->UnLock();
3242 }
3243
3244 //______________________________________________________________________________________________
3245 const char* AliShuttle::GetRunParameter(const char* param)
3246 {
3247         //
3248         // returns run parameter read from DAQ logbook
3249         //
3250
3251         if(!fLogbookEntry) {
3252                 AliError("No logbook entry!");
3253                 return 0;
3254         }
3255
3256         return fLogbookEntry->GetRunParameter(param);
3257 }
3258
3259 //______________________________________________________________________________________________
3260 AliCDBEntry* AliShuttle::GetFromOCDB(const char* detector, const AliCDBPath& path)
3261 {
3262         //
3263         // returns object from OCDB valid for current run
3264         //
3265
3266         if (fTestMode & kErrorOCDB)
3267         {
3268                 Log(detector, "GetFromOCDB - In TESTMODE - Simulating error with OCDB");
3269                 return 0;
3270         }
3271         
3272         AliCDBStorage *sto = AliCDBManager::Instance()->GetStorage(fgkMainCDB);
3273         if (!sto)
3274         {
3275                 Log(detector, "GetFromOCDB - Cannot activate main OCDB for query!");
3276                 return 0;
3277         }
3278
3279         return dynamic_cast<AliCDBEntry*> (sto->Get(path, GetCurrentRun()));
3280 }
3281
3282 //______________________________________________________________________________________________
3283 Bool_t AliShuttle::SendMail()
3284 {
3285         //
3286         // sends a mail to the subdetector expert in case of preprocessor error
3287         //
3288         
3289         if (fTestMode != kNone)
3290                 return kTRUE;
3291
3292         TString to="";
3293         TIter iterExperts(fConfig->GetResponsibles(fCurrentDetector));
3294         TObjString *anExpert=0;
3295         while ((anExpert = (TObjString*) iterExperts.Next()))
3296         {
3297                 to += Form("%s,", anExpert->GetName());
3298         }
3299         if (to.Length() > 0)
3300           to.Remove(to.Length()-1);
3301         AliDebug(2, Form("to: %s",to.Data()));
3302
3303         if (to.IsNull()) {
3304                 Log("SHUTTLE", "List of detector responsibles not yet set!");
3305                 return kFALSE;
3306         }
3307
3308         void* dir = gSystem->OpenDirectory(GetShuttleLogDir());
3309         if (dir == NULL)
3310         {
3311                 if (gSystem->mkdir(GetShuttleLogDir(), kTRUE))
3312                 {
3313                         Log("SHUTTLE", Form("SendMail - Can't open directory <%s>", GetShuttleLogDir()));
3314                         return kFALSE;
3315                 }
3316
3317         } else {
3318                 gSystem->FreeDirectory(dir);
3319         }
3320
3321         TString bodyFileName;
3322         bodyFileName.Form("%s/mail.body", GetShuttleLogDir());
3323         gSystem->ExpandPathName(bodyFileName);
3324
3325         ofstream mailBody;
3326         mailBody.open(bodyFileName, ofstream::out);
3327
3328         if (!mailBody.is_open())
3329         {
3330                 Log("SHUTTLE", Form("Could not open mail body file %s", bodyFileName.Data()));
3331                 return kFALSE;
3332         }
3333
3334         TString cc="alberto.colla@cern.ch";
3335
3336         TString subject = Form("%s Shuttle preprocessor FAILED in run %d (run type = %s)!",
3337                                 fCurrentDetector.Data(), GetCurrentRun(), GetRunType());
3338         AliDebug(2, Form("subject: %s", subject.Data()));
3339
3340         TString body = Form("Dear %s expert(s), \n\n", fCurrentDetector.Data());
3341         body += Form("SHUTTLE just detected that your preprocessor "
3342                         "failed processing run %d (run type = %s)!!\n\n", 
3343                                         GetCurrentRun(), GetRunType());
3344         body += Form("Please check %s status on the SHUTTLE monitoring page: \n\n", 
3345                                 fCurrentDetector.Data());
3346         if (fConfig->GetRunMode() == AliShuttleConfig::kTest)
3347         {
3348                 body += Form("\thttp://pcalimonitor.cern.ch:8889/shuttle.jsp?time=168 \n\n");
3349         } else {
3350                 body += Form("\thttp://pcalimonitor.cern.ch/shuttle.jsp?instance=PROD&time=168 \n\n");
3351         }
3352         
3353         
3354         TString logFolder = "logs";
3355         if (fConfig->GetRunMode() == AliShuttleConfig::kProd) 
3356                 logFolder += "_PROD";
3357         
3358         
3359         body += Form("Find the %s log for the current run on \n\n"
3360                 "\thttp://pcalishuttle01.cern.ch:8880/%s/%d/%s_%d.log \n\n", 
3361                 fCurrentDetector.Data(), logFolder.Data(), GetCurrentRun(), 
3362                                 fCurrentDetector.Data(), GetCurrentRun());
3363         body += Form("The last 10 lines of %s log file are following:\n\n", fCurrentDetector.Data());
3364
3365         AliDebug(2, Form("Body begin: %s", body.Data()));
3366
3367         mailBody << body.Data();
3368         mailBody.close();
3369         mailBody.open(bodyFileName, ofstream::out | ofstream::app);
3370
3371         TString logFileName = Form("%s/%d/%s_%d.log", GetShuttleLogDir(), 
3372                 GetCurrentRun(), fCurrentDetector.Data(), GetCurrentRun());
3373         TString tailCommand = Form("tail -n 10 %s >> %s", logFileName.Data(), bodyFileName.Data());
3374         if (gSystem->Exec(tailCommand.Data()))
3375         {
3376                 mailBody << Form("%s log file not found ...\n\n", fCurrentDetector.Data());
3377         }
3378
3379         TString endBody = Form("------------------------------------------------------\n\n");
3380         endBody += Form("In case of problems please contact the SHUTTLE core team.\n\n");
3381         endBody += "Please do not answer this message directly, it is automatically generated.\n\n";
3382         endBody += "Greetings,\n\n \t\t\tthe SHUTTLE\n";
3383
3384         AliDebug(2, Form("Body end: %s", endBody.Data()));
3385
3386         mailBody << endBody.Data();
3387
3388         mailBody.close();
3389
3390         // send mail!
3391         TString mailCommand = Form("mail -s \"%s\" -c %s %s < %s",
3392                                                 subject.Data(),
3393                                                 cc.Data(),
3394                                                 to.Data(),
3395                                                 bodyFileName.Data());
3396         AliDebug(2, Form("mail command: %s", mailCommand.Data()));
3397
3398         Bool_t result = gSystem->Exec(mailCommand.Data());
3399
3400         return result == 0;
3401 }
3402
3403 //______________________________________________________________________________________________
3404 Bool_t AliShuttle::SendMailToDCS()
3405 {
3406         //
3407         // sends a mail to the DCS experts in case of DCS error
3408         //
3409         
3410         if (fTestMode != kNone)
3411                 return kTRUE;
3412
3413         void* dir = gSystem->OpenDirectory(GetShuttleLogDir());
3414         if (dir == NULL)
3415         {
3416                 if (gSystem->mkdir(GetShuttleLogDir(), kTRUE))
3417                 {
3418                         Log("SHUTTLE", Form("SendMailToDCS - Can't open directory <%s>", GetShuttleLogDir()));
3419                         return kFALSE;
3420                 }
3421
3422         } else {
3423                 gSystem->FreeDirectory(dir);
3424         }
3425
3426         TString bodyFileName;
3427         bodyFileName.Form("%s/mail.body", GetShuttleLogDir());
3428         gSystem->ExpandPathName(bodyFileName);
3429
3430         ofstream mailBody;
3431         mailBody.open(bodyFileName, ofstream::out);
3432
3433         if (!mailBody.is_open())
3434         {
3435                 Log("SHUTTLE", Form("SendMailToDCS - Could not open mail body file %s", bodyFileName.Data()));
3436                 return kFALSE;
3437         }
3438
3439         TString to="Vladimir.Fekete@cern.ch, Svetozar.Kapusta@cern.ch";
3440         //TString to="alberto.colla@cern.ch";
3441         AliDebug(2, Form("to: %s",to.Data()));
3442
3443         if (to.IsNull()) {
3444                 Log("SHUTTLE", "List of detector responsibles not yet set!");
3445                 return kFALSE;
3446         }
3447
3448         TString cc="alberto.colla@cern.ch";
3449
3450         TString subject = Form("Retrieval of data points for %s FAILED in run %d !",
3451                                 fCurrentDetector.Data(), GetCurrentRun());
3452         AliDebug(2, Form("subject: %s", subject.Data()));
3453
3454         TString body = Form("Dear DCS experts, \n\n");
3455         body += Form("SHUTTLE couldn\'t retrieve the data points for detector %s "
3456                         "in run %d!!\n\n", fCurrentDetector.Data(), GetCurrentRun());
3457         body += Form("Please check %s status on the SHUTTLE monitoring page: \n\n", 
3458                                 fCurrentDetector.Data());
3459         if (fConfig->GetRunMode() == AliShuttleConfig::kTest)
3460         {
3461                 body += Form("\thttp://pcalimonitor.cern.ch:8889/shuttle.jsp?time=168 \n\n");
3462         } else {
3463                 body += Form("\thttp://pcalimonitor.cern.ch/shuttle.jsp?instance=PROD?time=168 \n\n");
3464         }
3465
3466         TString logFolder = "logs";
3467         if (fConfig->GetRunMode() == AliShuttleConfig::kProd) 
3468                 logFolder += "_PROD";
3469         
3470         
3471         body += Form("Find the %s log for the current run on \n\n"
3472                 "\thttp://pcalishuttle01.cern.ch:8880/%s/%d/%s_%d.log \n\n", 
3473                 fCurrentDetector.Data(), logFolder.Data(), GetCurrentRun(), 
3474                                 fCurrentDetector.Data(), GetCurrentRun());
3475         body += Form("The last 10 lines of %s log file are following:\n\n", fCurrentDetector.Data());
3476
3477         AliDebug(2, Form("Body begin: %s", body.Data()));
3478
3479         mailBody << body.Data();
3480         mailBody.close();
3481         mailBody.open(bodyFileName, ofstream::out | ofstream::app);
3482
3483         TString logFileName = Form("%s/%d/%s_%d.log", GetShuttleLogDir(), GetCurrentRun(),
3484                 fCurrentDetector.Data(), GetCurrentRun());
3485         TString tailCommand = Form("tail -n 10 %s >> %s", logFileName.Data(), bodyFileName.Data());
3486         if (gSystem->Exec(tailCommand.Data()))
3487         {
3488                 mailBody << Form("%s log file not found ...\n\n", fCurrentDetector.Data());
3489         }
3490
3491         TString endBody = Form("------------------------------------------------------\n\n");
3492         endBody += Form("In case of problems please contact the SHUTTLE core team.\n\n");
3493         endBody += "Please do not answer this message directly, it is automatically generated.\n\n";
3494         endBody += "Greetings,\n\n \t\t\tthe SHUTTLE\n";
3495
3496         AliDebug(2, Form("Body end: %s", endBody.Data()));
3497
3498         mailBody << endBody.Data();
3499
3500         mailBody.close();
3501
3502         // send mail!
3503         TString mailCommand = Form("mail -s \"%s\" -c %s %s < %s",
3504                                                 subject.Data(),
3505                                                 cc.Data(),
3506                                                 to.Data(),
3507                                                 bodyFileName.Data());
3508         AliDebug(2, Form("mail command: %s", mailCommand.Data()));
3509
3510         Bool_t result = gSystem->Exec(mailCommand.Data());
3511
3512         return result == 0;
3513 }
3514
3515 //______________________________________________________________________________________________
3516 const char* AliShuttle::GetRunType()
3517 {
3518         //
3519         // returns run type read from "run type" logbook
3520         //
3521
3522         if(!fLogbookEntry) {
3523                 AliError("No logbook entry!");
3524                 return 0;
3525         }
3526
3527         return fLogbookEntry->GetRunType();
3528 }
3529
3530 //______________________________________________________________________________________________
3531 Bool_t AliShuttle::GetHLTStatus()
3532 {
3533         // Return HLT status (ON=1 OFF=0)
3534         // Converts the HLT status from the status string read in the run logbook (not just a bool)
3535
3536         if(!fLogbookEntry) {
3537                 AliError("No logbook entry!");
3538                 return 0;
3539         }
3540
3541         // TODO implement when HLTStatus is inserted in run logbook
3542         //TString hltStatus = fLogbookEntry->GetRunParameter("HLTStatus");
3543         //if(hltStatus == "OFF") {return kFALSE};
3544
3545         return kTRUE;
3546 }
3547
3548 //______________________________________________________________________________________________
3549 void AliShuttle::SetShuttleTempDir(const char* tmpDir)
3550 {
3551         //
3552         // sets Shuttle temp directory
3553         //
3554
3555         fgkShuttleTempDir = gSystem->ExpandPathName(tmpDir);
3556 }
3557
3558 //______________________________________________________________________________________________
3559 void AliShuttle::SetShuttleLogDir(const char* logDir)
3560 {
3561         //
3562         // sets Shuttle log directory
3563         //
3564
3565         fgkShuttleLogDir = gSystem->ExpandPathName(logDir);
3566 }