]> git.uio.no Git - u/mrichter/AliRoot.git/blame - SHUTTLE/AliShuttle.cxx
consistent naming
[u/mrichter/AliRoot.git] / SHUTTLE / AliShuttle.cxx
CommitLineData
73abe331 1/**************************************************************************
2 * Copyright(c) 1998-1999, ALICE Experiment at CERN, All rights reserved. *
3 * *
4 * Author: The ALICE Off-line Project. *
5 * Contributors are mentioned in the code where appropriate. *
6 * *
7 * Permission to use, copy, modify and distribute this software and its *
8 * documentation strictly for non-commercial purposes is hereby granted *
9 * without fee, provided that the above copyright notice appears in all *
10 * copies and that both the copyright notice and this permission notice *
11 * appear in the supporting documentation. The authors make no claims *
12 * about the suitability of this software for any purpose. It is *
13 * provided "as is" without express or implied warranty. *
14 **************************************************************************/
15
16/*
17$Log$
b5f1c82e 18Revision 1.72 2007/12/13 15:44:28 acolla
19Run type added in mail sent to detector expert (eases understanding)
20
6a1146c4 21Revision 1.71 2007/12/12 14:56:14 jgrosseo
22sending shuttle_ignore to ML also in case of 0 events
23
97e3c167 24Revision 1.70 2007/12/12 13:45:35 acolla
25Monalisa started in Collect() function. Alive message to monitor is sent at each Collect and every minute during preprocessor processing.
26
ee6f7523 27Revision 1.69 2007/12/12 10:06:29 acolla
28in AliShuttle.cxx: SHUTTLE logbook is updated in case of invalid run times:
29
30time_start==0 && time_end==0
31
32logbook is NOT updated if time_start != 0 && time_end == 0, because it may mean that the run is still ongoing.
33
1abfbb60 34Revision 1.68 2007/12/11 10:15:17 acolla
35Added marking SHUTTLE=DONE for invalid runs
36(invalid start time or end time) and runs with totalEvents < 1
37
b0e53b15 38Revision 1.67 2007/12/07 19:14:36 acolla
39in AliShuttleTrigger:
40
41Added automatic collection of new runs on a regular time basis (settable from the configuration)
42
43in AliShuttleConfig: new members
44
45- triggerWait: time to wait for DIM trigger (s) before starting automatic collection of new runs
46- mode: run mode (test, prod) -> used to build log folder (logs or logs_PROD)
47
48in AliShuttle:
49
50- logs now stored in logs/#RUN/DET_#RUN.log
51
7d4cf768 52Revision 1.66 2007/12/05 10:45:19 jgrosseo
53changed order of arguments to TMonaLisaWriter
54
468df1ce 55Revision 1.65 2007/11/26 16:58:37 acolla
56Monalisa configuration added: host and table name
57
b832ec02 58Revision 1.64 2007/11/13 16:15:47 acolla
59DCS map is stored in a file in the temp folder where the detector is processed.
60If the preprocessor fails, the temp folder is not removed. This will help the debugging of the problem.
61
b1d18693 62Revision 1.63 2007/11/02 10:53:16 acolla
63Protection added to AliShuttle::CopyFileLocally
64
7d43a416 65Revision 1.62 2007/10/31 18:23:13 acolla
66Furter developement on the Shuttle:
67
68- Shuttle now connects to the Grid as alidaq. The OCDB and Reference folders
69are now built from /alice/data, e.g.:
70/alice/data/2007/LHC07a/OCDB
71
72the year and LHC period are taken from the Shuttle.
73Raw metadata files are stored by GRP to:
74/alice/data/2007/LHC07a/<runNb>/Raw/RunMetadata.root
75
76- Shuttle sends a mail to DCS experts each time DP retrieval fails.
77
675f64cd 78Revision 1.61 2007/10/30 20:33:51 acolla
79Improved managing of temporary folders, which weren't correctly handled.
80Resolved bug introduced in StoreReferenceFile, which caused SPD preprocessor fail.
81
d524ade6 82Revision 1.60 2007/10/29 18:06:16 acolla
83
84New function StoreRunMetadataFile added to preprocessor and Shuttle interface
85This function can be used by GRP only. It stores raw data tags merged file to the
86raw data folder (e.g. /alice/data/2008/LHC08a/000099999/Raw).
87
88KNOWN ISSUES:
89
901. Shuttle cannot write to /alice/data/ because it belongs to alidaq. Tag file is stored in /alice/simulation/... for the time being.
912. Due to a bug in TAlien::Mkdir, the creation of a folder in recursive mode (-p option) does not work. The problem
92has been corrected in the root package on the Shuttle machine.
93
c88ad5db 94Revision 1.59 2007/10/05 12:40:55 acolla
95
96Result error code added to AliDCSClient data members (it was "lost" with the new implementation of TMap* GetAliasValues and GetDPValues).
97
1790d4b7 98Revision 1.58 2007/09/28 15:27:40 acolla
99
100AliDCSClient "multiSplit" option added in the DCS configuration
101in AliDCSMessage: variable MAX_BODY_SIZE set to 500000
102
542b6cc8 103Revision 1.57 2007/09/27 16:53:13 acolla
104Detectors can have more than one AMANDA server. SHUTTLE queries the servers sequentially,
105merges the dcs aliases/DPs in one TMap and sends it to the preprocessor.
106
1d172743 107Revision 1.56 2007/09/14 16:46:14 jgrosseo
1081) Connect and Close are called before and after each query, so one can
109keep the same AliDCSClient object.
1102) The splitting of a query is moved to GetDPValues/GetAliasValues.
1113) Splitting interval can be specified in constructor
112
b41b252a 113Revision 1.55 2007/08/06 12:26:40 acolla
114Function Bool_t GetHLTStatus added to preprocessor. It returns the status of HLT
115read from the run logbook.
116
4859271b 117Revision 1.54 2007/07/12 09:51:25 jgrosseo
118removed duplicated log message in GetFile
119
4f0749a8 120Revision 1.53 2007/07/12 09:26:28 jgrosseo
121updating hlt fxs base path
122
42fde080 123Revision 1.52 2007/07/12 08:06:45 jgrosseo
124adding log messages in getfile... functions
125adding not implemented copy constructor in alishuttleconfigholder
126
1bcd28db 127Revision 1.51 2007/07/03 17:24:52 acolla
128root moved to v5-16-00. TFileMerger->Cp moved to TFile::Cp.
129
a986b218 130Revision 1.50 2007/07/02 17:19:32 acolla
131preprocessor is run in a temp directory that is removed when process is finished.
132
5bac2bde 133Revision 1.49 2007/06/29 10:45:06 acolla
134Number of columns in MySql Shuttle logbook increased by one (HLT added)
135
db99d43e 136Revision 1.48 2007/06/21 13:06:19 acolla
137GetFileSources returns dummy list with 1 source if system=DCS (better than
138returning error as it was)
139
6297b37d 140Revision 1.47 2007/06/19 17:28:56 acolla
141HLT updated; missing map bug removed.
142
dc25836b 143Revision 1.46 2007/06/09 13:01:09 jgrosseo
144Switching to retrieval of several DCS DPs at a time (multiDPrequest)
145
a038aa70 146Revision 1.45 2007/05/30 06:35:20 jgrosseo
147Adding functionality to the Shuttle/TestShuttle:
148o) Function to retrieve list of sources from a given system (GetFileSources with id=0)
149o) Function to retrieve list of IDs for a given source (GetFileIDs)
150These functions are needed for dealing with the tag files that are saved for the GRP preprocessor
151Example code has been added to the TestProcessor in TestShuttle
152
4a33bdd9 153Revision 1.44 2007/05/11 16:09:32 acolla
154Reference files for ITS, MUON and PHOS are now stored in OfflineDetName/OnlineDetName/run_...
155example: ITS/SPD/100_filename.root
156
2d9019b4 157Revision 1.43 2007/05/10 09:59:51 acolla
158Various bug fixes in StoreRefFilesToGrid; Cleaning of reference storage before processing detector (CleanReferenceStorage)
159
546242fb 160Revision 1.42 2007/05/03 08:01:39 jgrosseo
161typo in last commit :-(
162
8b739301 163Revision 1.41 2007/05/03 08:00:48 jgrosseo
164fixing log message when pp want to skip dcs value retrieval
165
651fdaab 166Revision 1.40 2007/04/27 07:06:48 jgrosseo
167GetFileSources returns empty list in case of no files, but successful query
168No mails sent in testmode
169
86aa42c3 170Revision 1.39 2007/04/17 12:43:57 acolla
171Correction in StoreOCDB; change of text in mail to detector expert
172
26758fce 173Revision 1.38 2007/04/12 08:26:18 jgrosseo
174updated comment
175
3c2a21c8 176Revision 1.37 2007/04/10 16:53:14 jgrosseo
177redirecting sub detector stdout, stderr to sub detector log file
178
3d8bc902 179Revision 1.35 2007/04/04 16:26:38 acolla
1801. Re-organization of function calls in TestPreprocessor to make it more meaningful.
1812. Added missing dependency in test preprocessors.
1823. in AliShuttle.cxx: processing time and memory consumption info on a single line.
183
886d60e6 184Revision 1.34 2007/04/04 10:33:36 jgrosseo
1851) Storing of files to the Grid is now done _after_ your preprocessors succeeded. This is transparent, which means that you can still use the same functions (Store, StoreReferenceData) to store files to the Grid. However, the Shuttle first stores them locally and transfers them after the preprocessor finished. The return code of these two functions has changed from UInt_t to Bool_t which gives you the success of the storing.
186In case of an error with the Grid, the Shuttle will retry the storing later, the preprocessor does not need to be run again.
187
1882) The meaning of the return code of the preprocessor has changed. 0 is now success and any other value means failure. This value is stored in the log and you can use it to keep details about the error condition.
189
1903) New function StoreReferenceFile to _directly_ store a file (without opening it) to the reference storage.
191
1924) The memory usage of the preprocessor is monitored. If it exceeds 2 GB it is terminated.
193
1945) New function AliPreprocessor::ProcessDCS(). If you do not need to have DCS data in all cases, you can skip the processing by implemting this function and returning kFALSE under certain conditions. E.g. if there is a certain run type.
195If you always need DCS data (like before), you do not need to implement it.
196
1976) The run type has been added to the monitoring page
198
9827400b 199Revision 1.33 2007/04/03 13:56:01 acolla
200Grid Storage at the end of preprocessing. Added virtual method to disable DCS query according to the
201run type.
202
3301427a 203Revision 1.32 2007/02/28 10:41:56 acolla
204Run type field added in SHUTTLE framework. Run type is read from "run type" logbook and retrieved by
205AliPreprocessor::GetRunType() function.
206Added some ldap definition files.
207
d386d623 208Revision 1.30 2007/02/13 11:23:21 acolla
209Moved getters and setters of Shuttle's main OCDB/Reference, local
210OCDB/Reference, temp and log folders to AliShuttleInterface
211
9d733021 212Revision 1.27 2007/01/30 17:52:42 jgrosseo
213adding monalisa monitoring
214
e7f62f16 215Revision 1.26 2007/01/23 19:20:03 acolla
216Removed old ldif files, added TOF, MCH ldif files. Added some options in
217AliShuttleConfig::Print. Added in Ali Shuttle: SetShuttleTempDir and
218SetShuttleLogDir
219
36c99a6a 220Revision 1.25 2007/01/15 19:13:52 acolla
221Moved some AliInfo to AliDebug in SendMail function
222
fc5a4708 223Revision 1.21 2006/12/07 08:51:26 jgrosseo
224update (alberto):
225table, db names in ldap configuration
226added GRP preprocessor
227DCS data can also be retrieved by data point
228
2c15234c 229Revision 1.20 2006/11/16 16:16:48 jgrosseo
230introducing strict run ordering flag
231removed giving preprocessor name to preprocessor, they have to know their name themselves ;-)
232
be48e3ea 233Revision 1.19 2006/11/06 14:23:04 jgrosseo
234major update (Alberto)
235o) reading of run parameters from the logbook
236o) online offline naming conversion
237o) standalone DCSclient package
238
eba76848 239Revision 1.18 2006/10/20 15:22:59 jgrosseo
240o) Adding time out to the execution of the preprocessors: The Shuttle forks and the parent process monitors the child
241o) Merging Collect, CollectAll, CollectNew function
242o) Removing implementation of empty copy constructors (declaration still there!)
243
cb343cfd 244Revision 1.17 2006/10/05 16:20:55 jgrosseo
245adapting to new CDB classes
246
6ec0e06c 247Revision 1.16 2006/10/05 15:46:26 jgrosseo
248applying to the new interface
249
481441a2 250Revision 1.15 2006/10/02 16:38:39 jgrosseo
251update (alberto):
252fixed memory leaks
253storing of objects that failed to be stored to the grid before
254interfacing of shuttle status table in daq system
255
2bb7b766 256Revision 1.14 2006/08/29 09:16:05 jgrosseo
257small update
258
85a80aa9 259Revision 1.13 2006/08/15 10:50:00 jgrosseo
260effc++ corrections (alberto)
261
4f0ab988 262Revision 1.12 2006/08/08 14:19:29 jgrosseo
263Update to shuttle classes (Alberto)
264
265- Possibility to set the full object's path in the Preprocessor's and
266Shuttle's Store functions
267- Possibility to extend the object's run validity in the same classes
268("startValidity" and "validityInfinite" parameters)
269- Implementation of the StoreReferenceData function to store reference
270data in a dedicated CDB storage.
271
84090f85 272Revision 1.11 2006/07/21 07:37:20 jgrosseo
273last run is stored after each run
274
7bfb2090 275Revision 1.10 2006/07/20 09:54:40 jgrosseo
276introducing status management: The processing per subdetector is divided into several steps,
277after each step the status is stored on disk. If the system crashes in any of the steps the Shuttle
278can keep track of the number of failures and skips further processing after a certain threshold is
279exceeded. These thresholds can be configured in LDAP.
280
5164a766 281Revision 1.9 2006/07/19 10:09:55 jgrosseo
282new configuration, accesst to DAQ FES (Alberto)
283
57f50b3c 284Revision 1.8 2006/07/11 12:44:36 jgrosseo
285adding parameters for extended validity range of data produced by preprocessor
286
17111222 287Revision 1.7 2006/07/10 14:37:09 jgrosseo
288small fix + todo comment
289
e090413b 290Revision 1.6 2006/07/10 13:01:41 jgrosseo
291enhanced storing of last sucessfully processed run (alberto)
292
a7160fe9 293Revision 1.5 2006/07/04 14:59:57 jgrosseo
294revision of AliDCSValue: Removed wrapper classes, reduced storage size per value by factor 2
295
45a493ce 296Revision 1.4 2006/06/12 09:11:16 jgrosseo
297coding conventions (Alberto)
298
58bc3020 299Revision 1.3 2006/06/06 14:26:40 jgrosseo
300o) removed files that were moved to STEER
301o) shuttle updated to follow the new interface (Alberto)
302
b948db8d 303Revision 1.2 2006/03/07 07:52:34 hristov
304New version (B.Yordanov)
305
d477ad88 306Revision 1.6 2005/11/19 17:19:14 byordano
307RetrieveDATEEntries and RetrieveConditionsData added
308
309Revision 1.5 2005/11/19 11:09:27 byordano
310AliShuttle declaration added
311
312Revision 1.4 2005/11/17 17:47:34 byordano
313TList changed to TObjArray
314
315Revision 1.3 2005/11/17 14:43:23 byordano
316import to local CVS
317
318Revision 1.1.1.1 2005/10/28 07:33:58 hristov
319Initial import as subdirectory in AliRoot
320
73abe331 321Revision 1.2 2005/09/13 08:41:15 byordano
322default startTime endTime added
323
324Revision 1.4 2005/08/30 09:13:02 byordano
325some docs added
326
327Revision 1.3 2005/08/29 21:15:47 byordano
328some docs added
329
330*/
331
332//
333// This class is the main manager for AliShuttle.
334// It organizes the data retrieval from DCS and call the
b948db8d 335// interface methods of AliPreprocessor.
73abe331 336// For every detector in AliShuttleConfgi (see AliShuttleConfig),
337// data for its set of aliases is retrieved. If there is registered
b948db8d 338// AliPreprocessor for this detector then it will be used
339// accroding to the schema (see AliPreprocessor).
340// If there isn't registered AliPreprocessor than the retrieved
73abe331 341// data is stored automatically to the undelying AliCDBStorage.
342// For detSpec is used the alias name.
343//
344
345#include "AliShuttle.h"
346
347#include "AliCDBManager.h"
348#include "AliCDBStorage.h"
349#include "AliCDBId.h"
84090f85 350#include "AliCDBRunRange.h"
351#include "AliCDBPath.h"
5164a766 352#include "AliCDBEntry.h"
73abe331 353#include "AliShuttleConfig.h"
eba76848 354#include "DCSClient/AliDCSClient.h"
73abe331 355#include "AliLog.h"
b948db8d 356#include "AliPreprocessor.h"
5164a766 357#include "AliShuttleStatus.h"
2bb7b766 358#include "AliShuttleLogbookEntry.h"
73abe331 359
57f50b3c 360#include <TSystem.h>
58bc3020 361#include <TObject.h>
b948db8d 362#include <TString.h>
57f50b3c 363#include <TTimeStamp.h>
73abe331 364#include <TObjString.h>
57f50b3c 365#include <TSQLServer.h>
366#include <TSQLResult.h>
367#include <TSQLRow.h>
cb343cfd 368#include <TMutex.h>
9827400b 369#include <TSystemDirectory.h>
370#include <TSystemFile.h>
a986b218 371#include <TFile.h>
9827400b 372#include <TGrid.h>
373#include <TGridResult.h>
73abe331 374
e7f62f16 375#include <TMonaLisaWriter.h>
376
5164a766 377#include <fstream>
378
cb343cfd 379#include <sys/types.h>
380#include <sys/wait.h>
381
73abe331 382ClassImp(AliShuttle)
383
b948db8d 384//______________________________________________________________________________________________
385AliShuttle::AliShuttle(const AliShuttleConfig* config,
386 UInt_t timeout, Int_t retries):
4f0ab988 387fConfig(config),
388fTimeout(timeout), fRetries(retries),
389fPreprocessorMap(),
2bb7b766 390fLogbookEntry(0),
eba76848 391fCurrentDetector(),
85a80aa9 392fStatusEntry(0),
cb343cfd 393fMonitoringMutex(0),
eba76848 394fLastActionTime(0),
e7f62f16 395fLastAction(),
9827400b 396fMonaLisa(0),
397fTestMode(kNone),
ffa29e93 398fReadTestMode(kFALSE),
399fOutputRedirected(kFALSE)
73abe331 400{
401 //
402 // config: AliShuttleConfig used
73abe331 403 // timeout: timeout used for AliDCSClient connection
404 // retries: the number of retries in case of connection error.
405 //
406
57f50b3c 407 if (!fConfig->IsValid()) AliFatal("********** !!!!! Invalid configuration !!!!! **********");
be48e3ea 408 for(int iSys=0;iSys<4;iSys++) {
57f50b3c 409 fServer[iSys]=0;
be48e3ea 410 if (iSys < 3)
2c15234c 411 fFXSlist[iSys].SetOwner(kTRUE);
57f50b3c 412 }
2bb7b766 413 fPreprocessorMap.SetOwner(kTRUE);
be48e3ea 414
415 for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
416 fFirstUnprocessed[iDet] = kFALSE;
417
cb343cfd 418 fMonitoringMutex = new TMutex();
58bc3020 419}
420
b948db8d 421//______________________________________________________________________________________________
57f50b3c 422AliShuttle::~AliShuttle()
58bc3020 423{
9827400b 424 //
425 // destructor
426 //
58bc3020 427
b948db8d 428 fPreprocessorMap.DeleteAll();
be48e3ea 429 for(int iSys=0;iSys<4;iSys++)
57f50b3c 430 if(fServer[iSys]) {
431 fServer[iSys]->Close();
432 delete fServer[iSys];
eba76848 433 fServer[iSys] = 0;
57f50b3c 434 }
2bb7b766 435
436 if (fStatusEntry){
437 delete fStatusEntry;
438 fStatusEntry = 0;
439 }
cb343cfd 440
441 if (fMonitoringMutex)
442 {
443 delete fMonitoringMutex;
444 fMonitoringMutex = 0;
445 }
73abe331 446}
447
b948db8d 448//______________________________________________________________________________________________
57f50b3c 449void AliShuttle::RegisterPreprocessor(AliPreprocessor* preprocessor)
58bc3020 450{
73abe331 451 //
b948db8d 452 // Registers new AliPreprocessor.
73abe331 453 // It uses GetName() for indentificator of the pre processor.
454 // The pre processor is registered it there isn't any other
455 // with the same identificator (GetName()).
456 //
457
eba76848 458 const char* detName = preprocessor->GetName();
459 if(GetDetPos(detName) < 0)
460 AliFatal(Form("********** !!!!! Invalid detector name: %s !!!!! **********", detName));
461
462 if (fPreprocessorMap.GetValue(detName)) {
463 AliWarning(Form("AliPreprocessor %s is already registered!", detName));
73abe331 464 return;
465 }
466
eba76848 467 fPreprocessorMap.Add(new TObjString(detName), preprocessor);
73abe331 468}
b948db8d 469//______________________________________________________________________________________________
3301427a 470Bool_t AliShuttle::Store(const AliCDBPath& path, TObject* object,
84090f85 471 AliCDBMetaData* metaData, Int_t validityStart, Bool_t validityInfinite)
73abe331 472{
9827400b 473 // Stores a CDB object in the storage for offline reconstruction. Objects that are not needed for
474 // offline reconstruction, but should be stored anyway (e.g. for debugging) should NOT be stored
475 // using this function. Use StoreReferenceData instead!
476 // It calls StoreLocally function which temporarily stores the data locally; when the preprocessor
477 // finishes the data are transferred to the main storage (Grid).
b948db8d 478
3301427a 479 return StoreLocally(fgkLocalCDB, path, object, metaData, validityStart, validityInfinite);
84090f85 480}
481
482//______________________________________________________________________________________________
3301427a 483Bool_t AliShuttle::StoreReferenceData(const AliCDBPath& path, TObject* object, AliCDBMetaData* metaData)
84090f85 484{
9827400b 485 // Stores a CDB object in the storage for reference data. This objects will not be available during
486 // offline reconstrunction. Use this function for reference data only!
487 // It calls StoreLocally function which temporarily stores the data locally; when the preprocessor
488 // finishes the data are transferred to the main storage (Grid).
85a80aa9 489
3301427a 490 return StoreLocally(fgkLocalRefStorage, path, object, metaData);
85a80aa9 491}
492
493//______________________________________________________________________________________________
3301427a 494Bool_t AliShuttle::StoreLocally(const TString& localUri,
85a80aa9 495 const AliCDBPath& path, TObject* object, AliCDBMetaData* metaData,
496 Int_t validityStart, Bool_t validityInfinite)
497{
9827400b 498 // Store object temporarily in local storage. Parameters are passed by Store and StoreReferenceData functions.
499 // when the preprocessor finishes the data are transferred to the main storage (Grid).
500 // The parameters are:
501 // 1) Uri of the backup storage (Local)
502 // 2) the object's path.
503 // 3) the object to be stored
504 // 4) the metaData to be associated with the object
505 // 5) the validity start run number w.r.t. the current run,
506 // if the data is valid only for this run leave the default 0
507 // 6) specifies if the calibration data is valid for infinity (this means until updated),
508 // typical for calibration runs, the default is kFALSE
509 //
510 // returns 0 if fail, 1 otherwise
84090f85 511
9827400b 512 if (fTestMode & kErrorStorage)
513 {
514 Log(fCurrentDetector, "StoreLocally - In TESTMODE - Simulating error while storing locally");
515 return kFALSE;
516 }
517
3301427a 518 const char* cdbType = (localUri == fgkLocalCDB) ? "CDB" : "Reference";
2bb7b766 519
85a80aa9 520 Int_t firstRun = GetCurrentRun() - validityStart;
84090f85 521 if(firstRun < 0) {
9827400b 522 AliWarning("First valid run happens to be less than 0! Setting it to 0.");
84090f85 523 firstRun=0;
524 }
525
526 Int_t lastRun = -1;
527 if(validityInfinite) {
528 lastRun = AliCDBRunRange::Infinity();
529 } else {
530 lastRun = GetCurrentRun();
531 }
532
3301427a 533 // Version is set to current run, it will be used later to transfer data to Grid
534 AliCDBId id(path, firstRun, lastRun, GetCurrentRun(), -1);
2bb7b766 535
536 if(! dynamic_cast<TObjString*> (metaData->GetProperty("RunUsed(TObjString)"))){
537 TObjString runUsed = Form("%d", GetCurrentRun());
9e080f92 538 metaData->SetProperty("RunUsed(TObjString)", runUsed.Clone());
2bb7b766 539 }
84090f85 540
3301427a 541 Bool_t result = kFALSE;
84090f85 542
3301427a 543 if (!(AliCDBManager::Instance()->GetStorage(localUri))) {
544 Log("SHUTTLE", Form("StoreLocally - Cannot activate local %s storage", cdbType));
84090f85 545 } else {
3301427a 546 result = AliCDBManager::Instance()->GetStorage(localUri)
84090f85 547 ->Put(object, id, metaData);
548 }
549
550 if(!result) {
551
9827400b 552 Log(fCurrentDetector, Form("StoreLocally - Can't store object <%s>!", id.ToString().Data()));
3301427a 553 }
2bb7b766 554
3301427a 555 return result;
556}
84090f85 557
3301427a 558//______________________________________________________________________________________________
559Bool_t AliShuttle::StoreOCDB()
560{
9827400b 561 //
562 // Called when preprocessor ends successfully or when previous storage attempt failed (kStoreError status)
563 // Calls underlying StoreOCDB(const char*) function twice, for OCDB and Reference storage.
564 // Then calls StoreRefFilesToGrid to store reference files.
565 //
566
567 if (fTestMode & kErrorGrid)
568 {
569 Log("SHUTTLE", "StoreOCDB - In TESTMODE - Simulating error while storing in the Grid");
570 Log(fCurrentDetector, "StoreOCDB - In TESTMODE - Simulating error while storing in the Grid");
571 return kFALSE;
572 }
573
c88ad5db 574 Log("SHUTTLE","StoreOCDB - Storing OCDB data ...");
86aa42c3 575 Bool_t resultCDB = StoreOCDB(fgkMainCDB);
576
c88ad5db 577 Log("SHUTTLE","StoreOCDB - Storing reference data ...");
3301427a 578 Bool_t resultRef = StoreOCDB(fgkMainRefStorage);
9827400b 579
c88ad5db 580 Log("SHUTTLE","StoreOCDB - Storing reference files ...");
581 Bool_t resultRefFiles = CopyFilesToGrid("reference");
582
583 Bool_t resultMetadata = kTRUE;
584 if(fCurrentDetector == "GRP")
585 {
586 Log("StoreOCDB - SHUTTLE","Storing Run Metadata file ...");
587 resultMetadata = CopyFilesToGrid("metadata");
588 }
9827400b 589
c88ad5db 590 return resultCDB && resultRef && resultRefFiles && resultMetadata;
3301427a 591}
592
593//______________________________________________________________________________________________
594Bool_t AliShuttle::StoreOCDB(const TString& gridURI)
595{
596 //
597 // Called by StoreOCDB(), performs actual storage to the main OCDB and reference storages (Grid)
598 //
599
600 TObjArray* gridIds=0;
601
602 Bool_t result = kTRUE;
603
604 const char* type = 0;
605 TString localURI;
606 if(gridURI == fgkMainCDB) {
607 type = "OCDB";
608 localURI = fgkLocalCDB;
609 } else if(gridURI == fgkMainRefStorage) {
610 type = "reference";
611 localURI = fgkLocalRefStorage;
612 } else {
613 AliError(Form("Invalid storage URI: %s", gridURI.Data()));
614 return kFALSE;
615 }
616
617 AliCDBManager* man = AliCDBManager::Instance();
618
619 AliCDBStorage *gridSto = man->GetStorage(gridURI);
620 if(!gridSto) {
621 Log("SHUTTLE",
622 Form("StoreOCDB - cannot activate main %s storage", type));
623 return kFALSE;
624 }
625
626 gridIds = gridSto->GetQueryCDBList();
627
628 // get objects previously stored in local CDB
629 AliCDBStorage *localSto = man->GetStorage(localURI);
630 if(!localSto) {
631 Log("SHUTTLE",
632 Form("StoreOCDB - cannot activate local %s storage", type));
633 return kFALSE;
634 }
635 AliCDBPath aPath(GetOfflineDetName(fCurrentDetector.Data()),"*","*");
636 // Local objects were stored with current run as Grid version!
637 TList* localEntries = localSto->GetAll(aPath.GetPath(), GetCurrentRun(), GetCurrentRun());
638 localEntries->SetOwner(1);
639
640 // loop on local stored objects
641 TIter localIter(localEntries);
642 AliCDBEntry *aLocEntry = 0;
643 while((aLocEntry = dynamic_cast<AliCDBEntry*> (localIter.Next()))){
644 aLocEntry->SetOwner(1);
645 AliCDBId aLocId = aLocEntry->GetId();
646 aLocEntry->SetVersion(-1);
647 aLocEntry->SetSubVersion(-1);
648
649 // If local object is valid up to infinity we store it only if it is
650 // the first unprocessed run!
651 if (aLocId.GetLastRun() == AliCDBRunRange::Infinity() &&
652 !fFirstUnprocessed[GetDetPos(fCurrentDetector)])
653 {
654 Log("SHUTTLE", Form("StoreOCDB - %s: object %s has validity infinite but "
655 "there are previous unprocessed runs!",
656 fCurrentDetector.Data(), aLocId.GetPath().Data()));
657 continue;
658 }
659
660 // loop on Grid valid Id's
661 Bool_t store = kTRUE;
662 TIter gridIter(gridIds);
663 AliCDBId* aGridId = 0;
664 while((aGridId = dynamic_cast<AliCDBId*> (gridIter.Next()))){
665 if(aGridId->GetPath() != aLocId.GetPath()) continue;
666 // skip all objects valid up to infinity
667 if(aGridId->GetLastRun() == AliCDBRunRange::Infinity()) continue;
668 // if we get here, it means there's already some more recent object stored on Grid!
669 store = kFALSE;
670 break;
671 }
672
673 // If we get here, the file can be stored!
674 Bool_t storeOk = gridSto->Put(aLocEntry);
675 if(!store || storeOk){
676
677 if (!store)
678 {
679 Log(fCurrentDetector.Data(),
680 Form("StoreOCDB - A more recent object already exists in %s storage: <%s>",
681 type, aGridId->ToString().Data()));
682 } else {
683 Log("SHUTTLE",
684 Form("StoreOCDB - Object <%s> successfully put into %s storage",
685 aLocId.ToString().Data(), type));
2d9019b4 686 Log(fCurrentDetector.Data(),
687 Form("StoreOCDB - Object <%s> successfully put into %s storage",
688 aLocId.ToString().Data(), type));
3301427a 689 }
84090f85 690
3301427a 691 // removing local filename...
692 TString filename;
693 localSto->IdToFilename(aLocId, filename);
c88ad5db 694 Log("SHUTTLE", Form("StoreOCDB - Removing local file %s", filename.Data()));
3301427a 695 RemoveFile(filename.Data());
696 continue;
697 } else {
698 Log("SHUTTLE",
699 Form("StoreOCDB - Grid %s storage of object <%s> failed",
700 type, aLocId.ToString().Data()));
2d9019b4 701 Log(fCurrentDetector.Data(),
702 Form("StoreOCDB - Grid %s storage of object <%s> failed",
703 type, aLocId.ToString().Data()));
3301427a 704 result = kFALSE;
b948db8d 705 }
706 }
3301427a 707 localEntries->Clear();
2bb7b766 708
b948db8d 709 return result;
3301427a 710}
711
546242fb 712//______________________________________________________________________________________________
713Bool_t AliShuttle::CleanReferenceStorage(const char* detector)
714{
2d9019b4 715 // clears the directory used to store reference files of a given subdetector
546242fb 716
717 AliCDBManager* man = AliCDBManager::Instance();
718 AliCDBStorage* sto = man->GetStorage(fgkLocalRefStorage);
2d9019b4 719 TString localBaseFolder = sto->GetBaseFolder();
720
721 TString targetDir = GetRefFilePrefix(localBaseFolder.Data(), detector);
722
d524ade6 723 Log("SHUTTLE", Form("CleanReferenceStorage - Cleaning %s", targetDir.Data()));
2d9019b4 724
725 TString begin;
726 begin.Form("%d_", GetCurrentRun());
727
728 TSystemDirectory* baseDir = new TSystemDirectory("/", targetDir);
729 if (!baseDir)
730 return kTRUE;
731
732 TList* dirList = baseDir->GetListOfFiles();
733 delete baseDir;
734
735 if (!dirList) return kTRUE;
736
737 if (dirList->GetEntries() < 3)
738 {
739 delete dirList;
740 return kTRUE;
741 }
742
743 Int_t nDirs = 0, nDel = 0;
744 TIter dirIter(dirList);
745 TSystemFile* entry = 0;
546242fb 746
2d9019b4 747 Bool_t success = kTRUE;
546242fb 748
2d9019b4 749 while ((entry = dynamic_cast<TSystemFile*> (dirIter.Next())))
750 {
751 if (entry->IsDirectory())
752 continue;
753
754 TString fileName(entry->GetName());
755 if (!fileName.BeginsWith(begin))
756 continue;
757
758 nDirs++;
759
760 // delete file
761 Int_t result = gSystem->Unlink(fileName.Data());
762
763 if (result)
764 {
d524ade6 765 Log("SHUTTLE", Form("CleanReferenceStorage - Could not delete file %s!", fileName.Data()));
2d9019b4 766 success = kFALSE;
767 } else {
768 nDel++;
769 }
770 }
771
772 if(nDirs > 0)
773 Log("SHUTTLE", Form("CleanReferenceStorage - %d (over %d) reference files in folder %s were deleted.",
774 nDel, nDirs, targetDir.Data()));
775
776
777 delete dirList;
778 return success;
779
780
781
782
783
546242fb 784
785 Int_t result = gSystem->GetPathInfo(targetDir, 0, (Long64_t*) 0, 0, 0);
786 if (result == 0)
787 {
788 // delete directory
d524ade6 789 result = gSystem->Exec(Form("rm -rf %s", targetDir.Data()));
546242fb 790 if (result != 0)
791 {
d524ade6 792 Log("SHUTTLE", Form("CleanReferenceStorage - Could not clean directory %s", targetDir.Data()));
546242fb 793 return kFALSE;
794 }
795 }
796
797 result = gSystem->mkdir(targetDir, kTRUE);
798 if (result != 0)
799 {
c88ad5db 800 Log("SHUTTLE", Form("CleanReferenceStorage - Error creating base directory %s", targetDir.Data()));
546242fb 801 return kFALSE;
802 }
803
804 return kTRUE;
805}
806
9827400b 807//______________________________________________________________________________________________
808Bool_t AliShuttle::StoreReferenceFile(const char* detector, const char* localFile, const char* gridFileName)
809{
810 //
3c2a21c8 811 // Stores reference file directly (without opening it). This function stores the file locally.
9827400b 812 //
3c2a21c8 813 // The file is stored under the following location:
814 // <base folder of local reference storage>/<DET>/<RUN#>_<gridFileName>
815 // where <gridFileName> is the second parameter given to the function
816 //
9827400b 817
818 if (fTestMode & kErrorStorage)
819 {
820 Log(fCurrentDetector, "StoreReferenceFile - In TESTMODE - Simulating error while storing locally");
821 return kFALSE;
822 }
823
824 AliCDBManager* man = AliCDBManager::Instance();
825 AliCDBStorage* sto = man->GetStorage(fgkLocalRefStorage);
826
827 TString localBaseFolder = sto->GetBaseFolder();
828
d524ade6 829 TString target = GetRefFilePrefix(localBaseFolder.Data(), detector);
830 target.Append(Form("/%d_%s", GetCurrentRun(), gridFileName));
9827400b 831
d524ade6 832 return CopyFileLocally(localFile, target);
c88ad5db 833}
834
835//______________________________________________________________________________________________
836Bool_t AliShuttle::StoreRunMetadataFile(const char* localFile, const char* gridFileName)
837{
838 //
839 // Stores Run metadata file to the Grid, in the run folder
840 //
841 // Only GRP can call this function.
842
843 if (fTestMode & kErrorStorage)
844 {
845 Log(fCurrentDetector, "StoreRunMetaDataFile - In TESTMODE - Simulating error while storing locally");
846 return kFALSE;
847 }
848
849 AliCDBManager* man = AliCDBManager::Instance();
850 AliCDBStorage* sto = man->GetStorage(fgkLocalRefStorage);
851
852 TString localBaseFolder = sto->GetBaseFolder();
853
854 // Build Run level folder
855 // folder = /alice/data/year/lhcPeriod/runNb/Raw
856
c88ad5db 857
675f64cd 858 TString lhcPeriod = GetLHCPeriod();
c88ad5db 859 if (lhcPeriod.Length() == 0)
860 {
861 Log("SHUTTLE","StoreRunMetaDataFile - LHCPeriod not found in logbook!");
862 return 0;
863 }
864
675f64cd 865 TString target = Form("%s/GRP/RunMetadata/alice/data/%d/%s/%09d/Raw/%s",
866 localBaseFolder.Data(), GetCurrentYear(),
d524ade6 867 lhcPeriod.Data(), GetCurrentRun(), gridFileName);
c88ad5db 868
d524ade6 869 return CopyFileLocally(localFile, target);
c88ad5db 870}
871
872//______________________________________________________________________________________________
d524ade6 873Bool_t AliShuttle::CopyFileLocally(const char* localFile, const TString& target)
c88ad5db 874{
875 //
876 // Stores file locally. Called by StoreReferenceFile and StoreRunMetadataFile
d524ade6 877 // Files are temporarily stored in the local reference storage. When the preprocessor
878 // finishes, the Shuttle calls CopyFilesToGrid to transfer the files to AliEn
879 // (in reference or run level folders)
c88ad5db 880 //
881
d524ade6 882 TString targetDir(target(0, target.Last('/')));
883
884 //try to open base dir folder, if it does not exist
2d9019b4 885 void* dir = gSystem->OpenDirectory(targetDir.Data());
886 if (dir == NULL) {
887 if (gSystem->mkdir(targetDir.Data(), kTRUE)) {
c88ad5db 888 Log("SHUTTLE", Form("StoreFileLocally - Can't open directory <%s>", targetDir.Data()));
2d9019b4 889 return kFALSE;
890 }
891
892 } else {
893 gSystem->FreeDirectory(dir);
894 }
9827400b 895
7d43a416 896 Int_t result = 0;
897
898 result = gSystem->GetPathInfo(localFile, 0, (Long64_t*) 0, 0, 0);
9827400b 899 if (result)
900 {
c88ad5db 901 Log("SHUTTLE", Form("StoreFileLocally - %s does not exist", localFile));
546242fb 902 return kFALSE;
9827400b 903 }
546242fb 904
7d43a416 905 result = gSystem->GetPathInfo(target, 0, (Long64_t*) 0, 0, 0);
906 if (!result)
907 {
908 Log("SHUTTLE", Form("StoreFileLocally - target file %s already exist, removing...", target.Data()));
909 if (gSystem->Unlink(target.Data()))
910 {
911 Log("SHUTTLE", Form("StoreFileLocally - Could not remove existing target file %s!", target.Data()));
912 return kFALSE;
913 }
914 }
915
9827400b 916 result = gSystem->CopyFile(localFile, target);
917
918 if (result == 0)
919 {
c88ad5db 920 Log("SHUTTLE", Form("StoreFileLocally - File %s stored locally to %s", localFile, target.Data()));
9827400b 921 return kTRUE;
922 }
923 else
924 {
d524ade6 925 Log("SHUTTLE", Form("StoreFileLocally - Could not store file %s to %s! Error code = %d",
546242fb 926 localFile, target.Data(), result));
9827400b 927 return kFALSE;
928 }
c88ad5db 929
930
931
9827400b 932}
933
934//______________________________________________________________________________________________
c88ad5db 935Bool_t AliShuttle::CopyFilesToGrid(const char* type)
9827400b 936{
937 //
c88ad5db 938 // Transfers local files to the Grid. Local files can be reference files
939 // or run metadata file (from GRP only).
9827400b 940 //
c88ad5db 941 // According to the type (ref, metadata) the files are stored under the following location:
942 // ref --> <base folder of reference storage>/<DET>/<RUN#>_<gridFileName>
943 // metadata --> <run data folder>/<MetadataFileName>
86aa42c3 944 //
c88ad5db 945
9827400b 946 AliCDBManager* man = AliCDBManager::Instance();
947 AliCDBStorage* sto = man->GetStorage(fgkLocalRefStorage);
948 if (!sto)
949 return kFALSE;
950 TString localBaseFolder = sto->GetBaseFolder();
9827400b 951
c88ad5db 952 TString dir;
953 TString alienDir;
9827400b 954 TString begin;
9827400b 955
c88ad5db 956 if (strcmp(type, "reference") == 0)
957 {
958 dir = GetRefFilePrefix(localBaseFolder.Data(), fCurrentDetector.Data());
959 AliCDBStorage* gridSto = man->GetStorage(fgkMainRefStorage);
960 if (!gridSto)
961 return kFALSE;
962 TString gridBaseFolder = gridSto->GetBaseFolder();
963 alienDir = GetRefFilePrefix(gridBaseFolder.Data(), fCurrentDetector.Data());
964 begin = Form("%d_", GetCurrentRun());
965 }
966 else if (strcmp(type, "metadata") == 0)
967 {
c88ad5db 968
675f64cd 969 TString lhcPeriod = GetLHCPeriod();
c88ad5db 970
971 if (lhcPeriod.Length() == 0)
972 {
973 Log("SHUTTLE","CopyFilesToGrid - LHCPeriod not found in logbook!");
974 return 0;
975 }
976
675f64cd 977 dir = Form("%s/GRP/RunMetadata/alice/data/%d/%s/%09d/Raw",
978 localBaseFolder.Data(), GetCurrentYear(),
c88ad5db 979 lhcPeriod.Data(), GetCurrentRun());
675f64cd 980 alienDir = dir(dir.Index("/alice/data/"), dir.Length());
981
c88ad5db 982 begin = "";
983 }
984 else
985 {
986 Log("SHUTTLE", "CopyFilesToGrid - Unexpected: type label must be reference or metadata!");
987 return kFALSE;
988 }
989
9827400b 990 TSystemDirectory* baseDir = new TSystemDirectory("/", dir);
3d8bc902 991 if (!baseDir)
992 return kTRUE;
993
2d9019b4 994 TList* dirList = baseDir->GetListOfFiles();
995 delete baseDir;
996
997 if (!dirList) return kTRUE;
998
999 if (dirList->GetEntries() < 3)
3d8bc902 1000 {
2d9019b4 1001 delete dirList;
9827400b 1002 return kTRUE;
3d8bc902 1003 }
2d9019b4 1004
546242fb 1005 if (!gGrid)
1006 {
c88ad5db 1007 Log("SHUTTLE", "CopyFilesToGrid - Connection to Grid failed: Cannot continue!");
2d9019b4 1008 delete dirList;
546242fb 1009 return kFALSE;
1010 }
1011
2d9019b4 1012 Int_t nDirs = 0, nTransfer = 0;
1013 TIter dirIter(dirList);
1014 TSystemFile* entry = 0;
1015
9827400b 1016 Bool_t success = kTRUE;
3d8bc902 1017 Bool_t first = kTRUE;
9827400b 1018
2d9019b4 1019 while ((entry = dynamic_cast<TSystemFile*> (dirIter.Next())))
1020 {
9827400b 1021 if (entry->IsDirectory())
1022 continue;
1023
1024 TString fileName(entry->GetName());
1025 if (!fileName.BeginsWith(begin))
1026 continue;
1027
2d9019b4 1028 nDirs++;
1029
3d8bc902 1030 if (first)
1031 {
1032 first = kFALSE;
c88ad5db 1033 // check that folder exists, otherwise create it
3d8bc902 1034 TGridResult* result = gGrid->Ls(alienDir.Data(), "a");
1035
1036 if (!result)
2d9019b4 1037 {
1038 delete dirList;
3d8bc902 1039 return kFALSE;
2d9019b4 1040 }
3d8bc902 1041
546242fb 1042 if (!result->GetFileName(1)) // TODO: It looks like element 0 is always 0!!
3d8bc902 1043 {
675f64cd 1044 // TODO It does not work currently! Bug in TAliEn::Mkdir
1045 // TODO Manually fixed in local root v5-16-00
c88ad5db 1046 if (!gGrid->Mkdir(alienDir.Data(),"-p",0))
3d8bc902 1047 {
c88ad5db 1048 Log("SHUTTLE", Form("CopyFilesToGrid - Cannot create directory %s",
3d8bc902 1049 alienDir.Data()));
2d9019b4 1050 delete dirList;
3d8bc902 1051 return kFALSE;
546242fb 1052 } else {
c88ad5db 1053 Log("SHUTTLE",Form("CopyFilesToGrid - Folder %s created", alienDir.Data()));
3d8bc902 1054 }
1055
546242fb 1056 } else {
c88ad5db 1057 Log("SHUTTLE",Form("CopyFilesToGrid - Folder %s found", alienDir.Data()));
3d8bc902 1058 }
1059 }
1060
9827400b 1061 TString fullLocalPath;
1062 fullLocalPath.Form("%s/%s", dir.Data(), fileName.Data());
1063
1064 TString fullGridPath;
1065 fullGridPath.Form("alien://%s/%s", alienDir.Data(), fileName.Data());
1066
a986b218 1067 Bool_t result = TFile::Cp(fullLocalPath, fullGridPath);
9827400b 1068
1069 if (result)
1070 {
c88ad5db 1071 Log("SHUTTLE", Form("CopyFilesToGrid - Copying local file %s to %s succeeded!",
1072 fullLocalPath.Data(), fullGridPath.Data()));
9827400b 1073 RemoveFile(fullLocalPath);
2d9019b4 1074 nTransfer++;
9827400b 1075 }
1076 else
1077 {
c88ad5db 1078 Log("SHUTTLE", Form("CopyFilesToGrid - Copying local file %s to %s FAILED!",
1079 fullLocalPath.Data(), fullGridPath.Data()));
9827400b 1080 success = kFALSE;
1081 }
1082 }
2d9019b4 1083
c88ad5db 1084 Log("SHUTTLE", Form("CopyFilesToGrid - %d (over %d) files in folder %s copied to Grid.",
1085 nTransfer, nDirs, dir.Data()));
2d9019b4 1086
1087
1088 delete dirList;
9827400b 1089 return success;
1090}
1091
2d9019b4 1092//______________________________________________________________________________________________
1093const char* AliShuttle::GetRefFilePrefix(const char* base, const char* detector)
1094{
1095 //
1096 // Get folder name of reference files
1097 //
1098
1099 TString offDetStr(GetOfflineDetName(detector));
1100 TString dir;
1101 if (offDetStr == "ITS" || offDetStr == "MUON" || offDetStr == "PHOS")
1102 {
1103 dir.Form("%s/%s/%s", base, offDetStr.Data(), detector);
1104 } else {
1105 dir.Form("%s/%s", base, offDetStr.Data());
1106 }
1107
1108 return dir.Data();
1109
1110
1111}
c88ad5db 1112
3301427a 1113//______________________________________________________________________________________________
1114void AliShuttle::CleanLocalStorage(const TString& uri)
1115{
9827400b 1116 //
1117 // Called in case the preprocessor is declared failed. Remove remaining objects from the local storages.
1118 //
3301427a 1119
1120 const char* type = 0;
1121 if(uri == fgkLocalCDB) {
1122 type = "OCDB";
1123 } else if(uri == fgkLocalRefStorage) {
546242fb 1124 type = "Reference";
3301427a 1125 } else {
1126 AliError(Form("Invalid storage URI: %s", uri.Data()));
1127 return;
1128 }
1129
1130 AliCDBManager* man = AliCDBManager::Instance();
b948db8d 1131
3301427a 1132 // open local storage
1133 AliCDBStorage *localSto = man->GetStorage(uri);
1134 if(!localSto) {
1135 Log("SHUTTLE",
1136 Form("CleanLocalStorage - cannot activate local %s storage", type));
1137 return;
1138 }
1139
1140 TString filename(Form("%s/%s/*/Run*_v%d_s*.root",
546242fb 1141 localSto->GetBaseFolder().Data(), GetOfflineDetName(fCurrentDetector.Data()), GetCurrentRun()));
3301427a 1142
c88ad5db 1143 AliDebug(2, Form("filename = %s", filename.Data()));
3301427a 1144
c88ad5db 1145 Log("SHUTTLE", Form("Removing remaining local files for run %d and detector %s ...",
3301427a 1146 GetCurrentRun(), fCurrentDetector.Data()));
1147
1148 RemoveFile(filename.Data());
1149
1150}
1151
1152//______________________________________________________________________________________________
1153void AliShuttle::RemoveFile(const char* filename)
1154{
9827400b 1155 //
1156 // removes local file
1157 //
3301427a 1158
1159 TString command(Form("rm -f %s", filename));
1160
1161 Int_t result = gSystem->Exec(command.Data());
1162 if(result != 0)
1163 {
1164 Log("SHUTTLE", Form("RemoveFile - %s: Cannot remove file %s!",
1165 fCurrentDetector.Data(), filename));
1166 }
73abe331 1167}
1168
b948db8d 1169//______________________________________________________________________________________________
5164a766 1170AliShuttleStatus* AliShuttle::ReadShuttleStatus()
1171{
9827400b 1172 //
1173 // Reads the AliShuttleStatus from the CDB
1174 //
5164a766 1175
2bb7b766 1176 if (fStatusEntry){
1177 delete fStatusEntry;
1178 fStatusEntry = 0;
1179 }
5164a766 1180
10a5a932 1181 fStatusEntry = AliCDBManager::Instance()->GetStorage(GetLocalCDB())
2bb7b766 1182 ->Get(Form("/SHUTTLE/STATUS/%s", fCurrentDetector.Data()), GetCurrentRun());
5164a766 1183
2bb7b766 1184 if (!fStatusEntry) return 0;
1185 fStatusEntry->SetOwner(1);
5164a766 1186
2bb7b766 1187 AliShuttleStatus* status = dynamic_cast<AliShuttleStatus*> (fStatusEntry->GetObject());
1188 if (!status) {
1189 AliError("Invalid object stored to CDB!");
1190 return 0;
1191 }
5164a766 1192
2bb7b766 1193 return status;
5164a766 1194}
1195
1196//______________________________________________________________________________________________
7bfb2090 1197Bool_t AliShuttle::WriteShuttleStatus(AliShuttleStatus* status)
5164a766 1198{
9827400b 1199 //
1200 // writes the status for one subdetector
1201 //
2bb7b766 1202
1203 if (fStatusEntry){
1204 delete fStatusEntry;
1205 fStatusEntry = 0;
1206 }
5164a766 1207
2bb7b766 1208 Int_t run = GetCurrentRun();
5164a766 1209
2bb7b766 1210 AliCDBId id(AliCDBPath("SHUTTLE", "STATUS", fCurrentDetector), run, run);
5164a766 1211
2bb7b766 1212 fStatusEntry = new AliCDBEntry(status, id, new AliCDBMetaData);
1213 fStatusEntry->SetOwner(1);
5164a766 1214
2bb7b766 1215 UInt_t result = AliCDBManager::Instance()->GetStorage(fgkLocalCDB)->Put(fStatusEntry);
7bfb2090 1216
2bb7b766 1217 if (!result) {
3301427a 1218 Log("SHUTTLE", Form("WriteShuttleStatus - Failed for %s, run %d",
1219 fCurrentDetector.Data(), run));
2bb7b766 1220 return kFALSE;
1221 }
e7f62f16 1222
1223 SendMLInfo();
7bfb2090 1224
2bb7b766 1225 return kTRUE;
5164a766 1226}
1227
1228//______________________________________________________________________________________________
1229void AliShuttle::UpdateShuttleStatus(AliShuttleStatus::Status newStatus, Bool_t increaseCount)
1230{
9827400b 1231 //
1232 // changes the AliShuttleStatus for the given detector and run to the given status
1233 //
5164a766 1234
2bb7b766 1235 if (!fStatusEntry){
1236 AliError("UNEXPECTED: fStatusEntry empty");
1237 return;
1238 }
5164a766 1239
2bb7b766 1240 AliShuttleStatus* status = dynamic_cast<AliShuttleStatus*> (fStatusEntry->GetObject());
5164a766 1241
2bb7b766 1242 if (!status){
c88ad5db 1243 Log("SHUTTLE", "UpdateShuttleStatus - UNEXPECTED: status could not be read from current CDB entry");
2bb7b766 1244 return;
1245 }
5164a766 1246
2c15234c 1247 TString actionStr = Form("UpdateShuttleStatus - %s: Changing state from %s to %s",
eba76848 1248 fCurrentDetector.Data(),
36c99a6a 1249 status->GetStatusName(),
eba76848 1250 status->GetStatusName(newStatus));
cb343cfd 1251 Log("SHUTTLE", actionStr);
1252 SetLastAction(actionStr);
5164a766 1253
2bb7b766 1254 status->SetStatus(newStatus);
1255 if (increaseCount) status->IncreaseCount();
5164a766 1256
2bb7b766 1257 AliCDBManager::Instance()->GetStorage(fgkLocalCDB)->Put(fStatusEntry);
e7f62f16 1258
1259 SendMLInfo();
5164a766 1260}
e7f62f16 1261
1262//______________________________________________________________________________________________
1263void AliShuttle::SendMLInfo()
1264{
1265 //
1266 // sends ML information about the current status of the current detector being processed
1267 //
1268
1269 AliShuttleStatus* status = dynamic_cast<AliShuttleStatus*> (fStatusEntry->GetObject());
1270
1271 if (!status){
3301427a 1272 Log("SHUTTLE", "SendMLInfo - UNEXPECTED: status could not be read from current CDB entry");
e7f62f16 1273 return;
1274 }
1275
1276 TMonaLisaText mlStatus(Form("%s_status", fCurrentDetector.Data()), status->GetStatusName());
1277 TMonaLisaValue mlRetryCount(Form("%s_count", fCurrentDetector.Data()), status->GetCount());
1278
1279 TList mlList;
1280 mlList.Add(&mlStatus);
1281 mlList.Add(&mlRetryCount);
1282
ee6f7523 1283 TString mlID;
1284 mlID.Form("%d", GetCurrentRun());
1285 fMonaLisa->SendParameters(&mlList, mlID);
e7f62f16 1286}
1287
5164a766 1288//______________________________________________________________________________________________
1289Bool_t AliShuttle::ContinueProcessing()
1290{
9827400b 1291 // this function reads the AliShuttleStatus information from CDB and
1292 // checks if the processing should be continued
1293 // if yes it returns kTRUE and updates the AliShuttleStatus with nextStatus
2bb7b766 1294
57c1a579 1295 if (!fConfig->HostProcessDetector(fCurrentDetector)) return kFALSE;
1296
1297 AliPreprocessor* aPreprocessor =
1298 dynamic_cast<AliPreprocessor*> (fPreprocessorMap.GetValue(fCurrentDetector));
1299 if (!aPreprocessor)
1300 {
c88ad5db 1301 Log("SHUTTLE", Form("ContinueProcessing - %s: no preprocessor registered", fCurrentDetector.Data()));
57c1a579 1302 return kFALSE;
1303 }
1304
2bb7b766 1305 AliShuttleLogbookEntry::Status entryStatus =
eba76848 1306 fLogbookEntry->GetDetectorStatus(fCurrentDetector);
2bb7b766 1307
1308 if(entryStatus != AliShuttleLogbookEntry::kUnprocessed) {
c88ad5db 1309 Log("SHUTTLE", Form("ContinueProcessing - %s is %s",
2bb7b766 1310 fCurrentDetector.Data(),
1311 fLogbookEntry->GetDetectorStatusName(entryStatus)));
1312 return kFALSE;
1313 }
1314
1315 // if we get here, according to Shuttle logbook subdetector is in UNPROCESSED state
be48e3ea 1316
1317 // check if current run is first unprocessed run for current detector
1318 if (fConfig->StrictRunOrder(fCurrentDetector) &&
1319 !fFirstUnprocessed[GetDetPos(fCurrentDetector)])
1320 {
86aa42c3 1321 if (fTestMode == kNone)
1322 {
c88ad5db 1323 Log("SHUTTLE", Form("ContinueProcessing - %s requires strict run ordering"
1324 " but this is not the first unprocessed run!"));
86aa42c3 1325 return kFALSE;
1326 }
1327 else
1328 {
c88ad5db 1329 Log("SHUTTLE", Form("ContinueProcessing - In TESTMODE - "
1330 "Although %s requires strict run ordering "
1331 "and this is not the first unprocessed run, "
1332 "the SHUTTLE continues"));
86aa42c3 1333 }
be48e3ea 1334 }
1335
2bb7b766 1336 AliShuttleStatus* status = ReadShuttleStatus();
1337 if (!status) {
1338 // first time
1339 Log("SHUTTLE", Form("ContinueProcessing - %s: Processing first time",
1340 fCurrentDetector.Data()));
1341 status = new AliShuttleStatus(AliShuttleStatus::kStarted);
1342 return WriteShuttleStatus(status);
1343 }
1344
1345 // The following two cases shouldn't happen if Shuttle Logbook was correctly updated.
1346 // If it happens it may mean Logbook updating failed... let's do it now!
1347 if (status->GetStatus() == AliShuttleStatus::kDone ||
1348 status->GetStatus() == AliShuttleStatus::kFailed){
1349 Log("SHUTTLE", Form("ContinueProcessing - %s is already %s. Updating Shuttle Logbook",
1350 fCurrentDetector.Data(),
1351 status->GetStatusName(status->GetStatus())));
1352 UpdateShuttleLogbook(fCurrentDetector.Data(),
1353 status->GetStatusName(status->GetStatus()));
1354 return kFALSE;
1355 }
1356
3301427a 1357 if (status->GetStatus() == AliShuttleStatus::kStoreError) {
2bb7b766 1358 Log("SHUTTLE",
c88ad5db 1359 Form("ContinueProcessing - %s: Grid storage of one or more "
1360 "objects failed. Trying again now",
2bb7b766 1361 fCurrentDetector.Data()));
9827400b 1362 UpdateShuttleStatus(AliShuttleStatus::kStoreStarted);
1363 if (StoreOCDB()){
c88ad5db 1364 Log("SHUTTLE", Form("ContinueProcessing - %s: all objects "
1365 "successfully stored into main storage",
3301427a 1366 fCurrentDetector.Data()));
2bb7b766 1367 } else {
1368 Log("SHUTTLE",
1369 Form("ContinueProcessing - %s: Grid storage failed again",
1370 fCurrentDetector.Data()));
9827400b 1371 UpdateShuttleStatus(AliShuttleStatus::kStoreError);
2bb7b766 1372 }
1373 return kFALSE;
1374 }
1375
1376 // if we get here, there is a restart
57c1a579 1377 Bool_t cont = kFALSE;
2bb7b766 1378
1379 // abort conditions
cb343cfd 1380 if (status->GetCount() >= fConfig->GetMaxRetries()) {
57c1a579 1381 Log("SHUTTLE", Form("ContinueProcessing - %s failed %d times in status %s - "
1382 "Updating Shuttle Logbook", fCurrentDetector.Data(),
2bb7b766 1383 status->GetCount(), status->GetStatusName()));
1384 UpdateShuttleLogbook(fCurrentDetector.Data(), "FAILED");
e7f62f16 1385 UpdateShuttleStatus(AliShuttleStatus::kFailed);
3301427a 1386
1387 // there may still be objects in local OCDB and reference storage
1388 // and FXS databases may be not updated: do it now!
9827400b 1389
1390 // TODO Currently disabled, we want to keep files in case of failure!
1391 // CleanLocalStorage(fgkLocalCDB);
1392 // CleanLocalStorage(fgkLocalRefStorage);
1393 // UpdateTableFailCase();
1394
1395 // Send mail to detector expert!
c88ad5db 1396 Log("SHUTTLE", Form("ContinueProcessing - Sending mail to %s expert...",
1397 fCurrentDetector.Data()));
9827400b 1398 if (!SendMail())
1399 Log("SHUTTLE", Form("ContinueProcessing - Could not send mail to %s expert",
1400 fCurrentDetector.Data()));
3301427a 1401
57c1a579 1402 } else {
1403 Log("SHUTTLE", Form("ContinueProcessing - %s: restarting. "
1404 "Aborted before with %s. Retry number %d.", fCurrentDetector.Data(),
1405 status->GetStatusName(), status->GetCount()));
9827400b 1406 Bool_t increaseCount = kTRUE;
c88ad5db 1407 if (status->GetStatus() == AliShuttleStatus::kDCSError ||
1408 status->GetStatus() == AliShuttleStatus::kDCSStarted)
1409 increaseCount = kFALSE;
675f64cd 1410
9827400b 1411 UpdateShuttleStatus(AliShuttleStatus::kStarted, increaseCount);
57c1a579 1412 cont = kTRUE;
2bb7b766 1413 }
1414
57c1a579 1415 return cont;
5164a766 1416}
1417
1418//______________________________________________________________________________________________
2bb7b766 1419Bool_t AliShuttle::Process(AliShuttleLogbookEntry* entry)
58bc3020 1420{
73abe331 1421 //
b948db8d 1422 // Makes data retrieval for all detectors in the configuration.
2bb7b766 1423 // entry: Shuttle logbook entry, contains run paramenters and status of detectors
1424 // (Unprocessed, Inactive, Failed or Done).
d477ad88 1425 // Returns kFALSE in case of error occured and kTRUE otherwise
73abe331 1426 //
1427
9827400b 1428 if (!entry) return kFALSE;
2bb7b766 1429
1430 fLogbookEntry = entry;
1431
c88ad5db 1432 Log("SHUTTLE", Form("\t\t\t^*^*^*^*^*^*^*^*^*^*^*^* run %d: START ^*^*^*^*^*^*^*^*^*^*^*^*",
9827400b 1433 GetCurrentRun()));
2bb7b766 1434
e7f62f16 1435 // Send the information to ML
1436 TMonaLisaText mlStatus("SHUTTLE_status", "Processing");
9827400b 1437 TMonaLisaText mlRunType("SHUTTLE_runtype", Form("%s (%s)", entry->GetRunType(), entry->GetRunParameter("log")));
e7f62f16 1438
1439 TList mlList;
1440 mlList.Add(&mlStatus);
9827400b 1441 mlList.Add(&mlRunType);
e7f62f16 1442
ee6f7523 1443 TString mlID;
1444 mlID.Form("%d", GetCurrentRun());
1445 fMonaLisa->SendParameters(&mlList, mlID);
3301427a 1446
9827400b 1447 if (fLogbookEntry->IsDone())
1448 {
1449 Log("SHUTTLE","Process - Shuttle is already DONE. Updating logbook");
1450 UpdateShuttleLogbook("shuttle_done");
1451 fLogbookEntry = 0;
1452 return kTRUE;
1453 }
1454
1455 // read test mode if flag is set
1456 if (fReadTestMode)
1457 {
3d8bc902 1458 fTestMode = kNone;
9827400b 1459 TString logEntry(entry->GetRunParameter("log"));
1460 //printf("log entry = %s\n", logEntry.Data());
1461 TString searchStr("Testmode: ");
1462 Int_t pos = logEntry.Index(searchStr.Data());
1463 //printf("%d\n", pos);
1464 if (pos >= 0)
1465 {
1466 TSubString subStr = logEntry(pos + searchStr.Length(), logEntry.Length());
1467 //printf("%s\n", subStr.String().Data());
1468 TString newStr(subStr.Data());
1469 TObjArray* token = newStr.Tokenize(' ');
1470 if (token)
1471 {
1472 //token->Print();
1473 TObjString* tmpStr = dynamic_cast<TObjString*> (token->First());
1474 if (tmpStr)
1475 {
1476 Int_t testMode = tmpStr->String().Atoi();
1477 if (testMode > 0)
1478 {
c88ad5db 1479 Log("SHUTTLE", Form("Process - Enabling test mode %d", testMode));
9827400b 1480 SetTestMode((TestMode) testMode);
1481 }
1482 }
1483 delete token;
1484 }
1485 }
1486 }
c88ad5db 1487
eba76848 1488 fLogbookEntry->Print("all");
57f50b3c 1489
1490 // Initialization
d477ad88 1491 Bool_t hasError = kFALSE;
5164a766 1492
675f64cd 1493 // Set the CDB and Reference folders according to the year and LHC period
1494 TString lhcPeriod(GetLHCPeriod());
1495 if (lhcPeriod.Length() == 0)
1496 {
7d4cf768 1497 Log("SHUTTLE","Process - LHCPeriod not found in logbook!");
1498 return 0;
675f64cd 1499 }
1500
1501 if (fgkMainCDB.Length() == 0)
1502 fgkMainCDB = Form("alien://folder=/alice/data/%d/%s/OCDB?user=alidaq?cacheFold=/tmp/OCDBCache",
1503 GetCurrentYear(), lhcPeriod.Data());
1504
1505 if (fgkMainRefStorage.Length() == 0)
1506 fgkMainRefStorage = Form("alien://folder=/alice/data/%d/%s/Reference?user=alidaq?cacheFold=/tmp/OCDBCache",
1507 GetCurrentYear(), lhcPeriod.Data());
1508
2bb7b766 1509 AliCDBStorage *mainCDBSto = AliCDBManager::Instance()->GetStorage(fgkMainCDB);
1510 if(mainCDBSto) mainCDBSto->QueryCDB(GetCurrentRun());
1511 AliCDBStorage *mainRefSto = AliCDBManager::Instance()->GetStorage(fgkMainRefStorage);
1512 if(mainRefSto) mainRefSto->QueryCDB(GetCurrentRun());
d477ad88 1513
57f50b3c 1514 // Loop on detectors in the configuration
b948db8d 1515 TIter iter(fConfig->GetDetectors());
2bb7b766 1516 TObjString* aDetector = 0;
b948db8d 1517
be48e3ea 1518 while ((aDetector = (TObjString*) iter.Next()))
1519 {
7bfb2090 1520 fCurrentDetector = aDetector->String();
5164a766 1521
9e080f92 1522 if (ContinueProcessing() == kFALSE) continue;
1523
c88ad5db 1524 Log("SHUTTLE", Form("\t\t\t****** run %d - %s: START ******",
2bb7b766 1525 GetCurrentRun(), aDetector->GetName()));
1526
9d733021 1527 for(Int_t iSys=0;iSys<3;iSys++) fFXSCalled[iSys]=kFALSE;
1528
c88ad5db 1529 Log(fCurrentDetector.Data(), "Process - Starting processing");
85a80aa9 1530
be48e3ea 1531 Int_t pid = fork();
1532
1533 if (pid < 0)
1534 {
c88ad5db 1535 Log("SHUTTLE", "Process - ERROR: Forking failed");
be48e3ea 1536 }
1537 else if (pid > 0)
1538 {
1539 // parent
c88ad5db 1540 Log("SHUTTLE", Form("Process - In parent process of %d - %s: Starting monitoring",
be48e3ea 1541 GetCurrentRun(), aDetector->GetName()));
1542
1543 Long_t begin = time(0);
1544
1545 int status; // to be used with waitpid, on purpose an int (not Int_t)!
1546 while (waitpid(pid, &status, WNOHANG) == 0)
1547 {
1548 Long_t expiredTime = time(0) - begin;
1549
1550 if (expiredTime > fConfig->GetPPTimeOut())
1551 {
9827400b 1552 TString tmp;
c88ad5db 1553 tmp.Form("Process - Process of %s time out. "
1554 "Run time: %d seconds. Killing...",
1555 fCurrentDetector.Data(), expiredTime);
9827400b 1556 Log("SHUTTLE", tmp);
1557 Log(fCurrentDetector, tmp);
be48e3ea 1558
1559 kill(pid, 9);
1560
3301427a 1561 UpdateShuttleStatus(AliShuttleStatus::kPPTimeOut);
be48e3ea 1562 hasError = kTRUE;
1563
1564 gSystem->Sleep(1000);
1565 }
1566 else
1567 {
be48e3ea 1568 gSystem->Sleep(1000);
9827400b 1569
1570 TString checkStr;
1571 checkStr.Form("ps -o vsize --pid %d | tail -n 1", pid);
1572 FILE* pipe = gSystem->OpenPipe(checkStr, "r");
1573 if (!pipe)
1574 {
c88ad5db 1575 Log("SHUTTLE", Form("Process - Error: "
1576 "Could not open pipe to %s", checkStr.Data()));
9827400b 1577 continue;
1578 }
1579
1580 char buffer[100];
1581 if (!fgets(buffer, 100, pipe))
1582 {
c88ad5db 1583 Log("SHUTTLE", "Process - Error: ps did not return anything");
9827400b 1584 gSystem->ClosePipe(pipe);
1585 continue;
1586 }
1587 gSystem->ClosePipe(pipe);
1588
1589 //Log("SHUTTLE", Form("ps returned %s", buffer));
1590
1591 Int_t mem = 0;
1592 if ((sscanf(buffer, "%d\n", &mem) != 1) || !mem)
1593 {
c88ad5db 1594 Log("SHUTTLE", "Process - Error: Could not parse output of ps");
9827400b 1595 continue;
1596 }
1597
1598 if (expiredTime % 60 == 0)
ee6f7523 1599 {
c88ad5db 1600 Log("SHUTTLE", Form("Process - %s: Checking process. "
1601 "Run time: %d seconds - Memory consumption: %d KB",
1602 fCurrentDetector.Data(), expiredTime, mem));
ee6f7523 1603 SendAlive();
1604 }
9827400b 1605
1606 if (mem > fConfig->GetPPMaxMem())
1607 {
1608 TString tmp;
c88ad5db 1609 tmp.Form("Process - Process exceeds maximum allowed memory "
1610 "(%d KB > %d KB). Killing...",
9827400b 1611 mem, fConfig->GetPPMaxMem());
1612 Log("SHUTTLE", tmp);
1613 Log(fCurrentDetector, tmp);
1614
1615 kill(pid, 9);
1616
1617 UpdateShuttleStatus(AliShuttleStatus::kPPOutOfMemory);
1618 hasError = kTRUE;
1619
1620 gSystem->Sleep(1000);
1621 }
be48e3ea 1622 }
1623 }
1624
c88ad5db 1625 Log("SHUTTLE", Form("Process - In parent process of %d - %s: Client has terminated.",
be48e3ea 1626 GetCurrentRun(), aDetector->GetName()));
1627
1628 if (WIFEXITED(status))
1629 {
1630 Int_t returnCode = WEXITSTATUS(status);
1631
c88ad5db 1632 Log("SHUTTLE", Form("Process - %s: the return code is %d", fCurrentDetector.Data(),
3301427a 1633 returnCode));
be48e3ea 1634
9827400b 1635 if (returnCode == 0) hasError = kTRUE;
be48e3ea 1636 }
1637 }
1638 else if (pid == 0)
1639 {
1640 // client
c88ad5db 1641 Log("SHUTTLE", Form("Process - In client process of %d - %s", GetCurrentRun(),
1642 aDetector->GetName()));
be48e3ea 1643
c88ad5db 1644 Log("SHUTTLE", Form("Process - Redirecting output to %s log",fCurrentDetector.Data()));
ffa29e93 1645
546242fb 1646 if ((freopen(GetLogFileName(fCurrentDetector), "a", stdout)) == 0)
ffa29e93 1647 {
c88ad5db 1648 Log("SHUTTLE", "Process - Could not freopen stdout");
ffa29e93 1649 }
1650 else
1651 {
1652 fOutputRedirected = kTRUE;
1653 if ((dup2(fileno(stdout), fileno(stderr))) < 0)
c88ad5db 1654 Log("SHUTTLE", "Process - Could not redirect stderr");
ffa29e93 1655
1656 }
1657
5bac2bde 1658 TString wd = gSystem->WorkingDirectory();
675f64cd 1659 TString tmpDir = Form("%s/%s_%d_process", GetShuttleTempDir(),
1660 fCurrentDetector.Data(), GetCurrentRun());
5bac2bde 1661
d524ade6 1662 Int_t result = gSystem->GetPathInfo(tmpDir.Data(), 0, (Long64_t*) 0, 0, 0);
1663 if (!result) // temp dir already exists!
1664 {
1665 Log(fCurrentDetector.Data(),
1666 Form("Process - %s dir already exists! Removing...", tmpDir.Data()));
1667 gSystem->Exec(Form("rm -rf %s",tmpDir.Data()));
675f64cd 1668 }
1669
1670 if (gSystem->mkdir(tmpDir.Data(), 1))
1671 {
1672 Log(fCurrentDetector.Data(), "Process - could not make temp directory!!");
1673 gSystem->Exit(1);
d524ade6 1674 }
1675
1676 if (!gSystem->ChangeDirectory(tmpDir.Data()))
1677 {
1678 Log(fCurrentDetector.Data(), "Process - could not change directory!!");
1679 gSystem->Exit(1);
1680 }
5bac2bde 1681
9827400b 1682 Bool_t success = ProcessCurrentDetector();
5bac2bde 1683
1684 gSystem->ChangeDirectory(wd.Data());
b1d18693 1685
9827400b 1686 if (success) // Preprocessor finished successfully!
1687 {
b1d18693 1688 // remove temporary folder
1689 gSystem->Exec(Form("rm -rf %s",tmpDir.Data()));
1690
3301427a 1691 // Update time_processed field in FXS DB
1692 if (UpdateTable() == kFALSE)
5bac2bde 1693 Log("SHUTTLE", Form("Process - %s: Could not update FXS databases!",
1694 fCurrentDetector.Data()));
3301427a 1695
1696 // Transfer the data from local storage to main storage (Grid)
1697 UpdateShuttleStatus(AliShuttleStatus::kStoreStarted);
1698 if (StoreOCDB() == kFALSE)
1699 {
c88ad5db 1700 Log("SHUTTLE",
1701 Form("\t\t\t****** run %d - %s: STORAGE ERROR ******",
3301427a 1702 GetCurrentRun(), aDetector->GetName()));
1703 UpdateShuttleStatus(AliShuttleStatus::kStoreError);
9827400b 1704 success = kFALSE;
3301427a 1705 } else {
c88ad5db 1706 Log("SHUTTLE",
1707 Form("\t\t\t****** run %d - %s: DONE ******",
3301427a 1708 GetCurrentRun(), aDetector->GetName()));
1709 UpdateShuttleStatus(AliShuttleStatus::kDone);
9827400b 1710 UpdateShuttleLogbook(fCurrentDetector, "DONE");
3301427a 1711 }
c88ad5db 1712 } else
1713 {
1714 Log("SHUTTLE",
1715 Form("\t\t\t****** run %d - %s: PP ERROR ******",
1716 GetCurrentRun(), aDetector->GetName()));
be48e3ea 1717 }
1718
4b95672b 1719 for (UInt_t iSys=0; iSys<3; iSys++)
1720 {
1721 if (fFXSCalled[iSys]) fFXSlist[iSys].Clear();
1722 }
1723
c88ad5db 1724 Log("SHUTTLE", Form("Process - Client process of %d - %s is exiting now with %d.",
9827400b 1725 GetCurrentRun(), aDetector->GetName(), success));
be48e3ea 1726
1727 // the client exits here
9827400b 1728 gSystem->Exit(success);
be48e3ea 1729
1730 AliError("We should never get here!!!");
1731 }
7bfb2090 1732 }
5164a766 1733
c88ad5db 1734 Log("SHUTTLE", Form("\t\t\t^*^*^*^*^*^*^*^*^*^*^*^* run %d: FINISH ^*^*^*^*^*^*^*^*^*^*^*^*",
2bb7b766 1735 GetCurrentRun()));
1736
1737 //check if shuttle is done for this run, if so update logbook
1738 TObjArray checkEntryArray;
1739 checkEntryArray.SetOwner(1);
9e080f92 1740 TString whereClause = Form("where run=%d", GetCurrentRun());
b0e53b15 1741 if (!QueryShuttleLogbook(whereClause.Data(), checkEntryArray) ||
1742 checkEntryArray.GetEntries() == 0) {
9e080f92 1743 Log("SHUTTLE", Form("Process - Warning: Cannot check status of run %d on Shuttle logbook!",
1744 GetCurrentRun()));
1745 return hasError == kFALSE;
1746 }
b948db8d 1747
9e080f92 1748 AliShuttleLogbookEntry* checkEntry = dynamic_cast<AliShuttleLogbookEntry*>
1749 (checkEntryArray.At(0));
2bb7b766 1750
9e080f92 1751 if (checkEntry)
1752 {
1753 if (checkEntry->IsDone())
be48e3ea 1754 {
9e080f92 1755 Log("SHUTTLE","Process - Shuttle is DONE. Updating logbook");
1756 UpdateShuttleLogbook("shuttle_done");
1757 }
1758 else
1759 {
1760 for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
be48e3ea 1761 {
9e080f92 1762 if (checkEntry->GetDetectorStatus(iDet) == AliShuttleLogbookEntry::kUnprocessed)
be48e3ea 1763 {
9e080f92 1764 AliDebug(2, Form("Run %d: setting %s as \"not first time unprocessed\"",
1765 checkEntry->GetRun(), GetDetName(iDet)));
1766 fFirstUnprocessed[iDet] = kFALSE;
be48e3ea 1767 }
1768 }
2bb7b766 1769 }
1770 }
1771
1772 fLogbookEntry = 0;
85a80aa9 1773
a7160fe9 1774 return hasError == kFALSE;
73abe331 1775}
1776
b948db8d 1777//______________________________________________________________________________________________
9827400b 1778Bool_t AliShuttle::ProcessCurrentDetector()
73abe331 1779{
1780 //
2bb7b766 1781 // Makes data retrieval just for a specific detector (fCurrentDetector).
73abe331 1782 // Threre should be a configuration for this detector.
73abe331 1783
1d172743 1784 Log("SHUTTLE", Form("ProcessCurrentDetector - Retrieving values for %s, run %d",
1785 fCurrentDetector.Data(), GetCurrentRun()));
73abe331 1786
d524ade6 1787 TString wd = gSystem->WorkingDirectory();
1788
2d9019b4 1789 if (!CleanReferenceStorage(fCurrentDetector.Data()))
546242fb 1790 return kFALSE;
d524ade6 1791
1792 gSystem->ChangeDirectory(wd.Data());
1793
1d172743 1794 TMap* dcsMap = new TMap();
3301427a 1795
1796 // call preprocessor
1797 AliPreprocessor* aPreprocessor =
1798 dynamic_cast<AliPreprocessor*> (fPreprocessorMap.GetValue(fCurrentDetector));
1799
1800 aPreprocessor->Initialize(GetCurrentRun(), GetCurrentStartTime(), GetCurrentEndTime());
1801
1802 Bool_t processDCS = aPreprocessor->ProcessDCS();
d477ad88 1803
651fdaab 1804 if (!processDCS)
1805 {
1d172743 1806 Log(fCurrentDetector, "ProcessCurrentDetector -"
1807 " The preprocessor requested to skip the retrieval of DCS values");
651fdaab 1808 }
8b739301 1809 else if (fTestMode & kSkipDCS)
2c15234c 1810 {
1d172743 1811 Log(fCurrentDetector, "ProcessCurrentDetector - In TESTMODE: Skipping DCS processing");
9827400b 1812 }
1813 else if (fTestMode & kErrorDCS)
1814 {
1d172743 1815 Log(fCurrentDetector, "ProcessCurrentDetector - In TESTMODE: Simulating DCS error");
3d8bc902 1816 UpdateShuttleStatus(AliShuttleStatus::kDCSStarted);
9827400b 1817 UpdateShuttleStatus(AliShuttleStatus::kDCSError);
1d172743 1818 delete dcsMap;
9827400b 1819 return kFALSE;
2c15234c 1820 } else {
3301427a 1821
1822 UpdateShuttleStatus(AliShuttleStatus::kDCSStarted);
1823
1d172743 1824 // Query DCS archive
1825 Int_t nServers = fConfig->GetNServers(fCurrentDetector);
a038aa70 1826
1d172743 1827 for (int iServ=0; iServ<nServers; iServ++)
2c15234c 1828 {
1d172743 1829
1830 TString host(fConfig->GetDCSHost(fCurrentDetector, iServ));
1831 Int_t port = fConfig->GetDCSPort(fCurrentDetector, iServ);
542b6cc8 1832 Int_t multiSplit = fConfig->GetMultiSplit(fCurrentDetector, iServ);
1833
1790d4b7 1834 Log(fCurrentDetector, Form("ProcessCurrentDetector -"
1835 " Querying DCS Amanda server %s:%d (%d of %d)",
1836 host.Data(), port, iServ+1, nServers));
1d172743 1837
1838 TMap* aliasMap = 0;
1839 TMap* dpMap = 0;
1840
1841 if (fConfig->GetDCSAliases(fCurrentDetector, iServ)->GetEntries() > 0)
2c15234c 1842 {
1d172743 1843 aliasMap = GetValueSet(host, port,
542b6cc8 1844 fConfig->GetDCSAliases(fCurrentDetector, iServ),
1845 kAlias, multiSplit);
1d172743 1846 if (!aliasMap)
1847 {
1848 Log(fCurrentDetector,
1849 Form("ProcessCurrentDetector -"
675f64cd 1850 " Error retrieving DCS aliases from server %s."
1851 " Sending mail to DCS experts!", host.Data()));
1d172743 1852 UpdateShuttleStatus(AliShuttleStatus::kDCSError);
675f64cd 1853
b5f1c82e 1854 //if (!SendMailToDCS())
1855 // Log("SHUTTLE", Form("ProcessCurrentDetector - Could not send mail to DCS experts!"));
675f64cd 1856
a038aa70 1857 delete dcsMap;
1d172743 1858 return kFALSE;
1859 }
2c15234c 1860 }
a038aa70 1861
1d172743 1862 if (fConfig->GetDCSDataPoints(fCurrentDetector, iServ)->GetEntries() > 0)
a038aa70 1863 {
1d172743 1864 dpMap = GetValueSet(host, port,
542b6cc8 1865 fConfig->GetDCSDataPoints(fCurrentDetector, iServ),
1866 kDP, multiSplit);
1d172743 1867 if (!dpMap)
1868 {
1869 Log(fCurrentDetector,
1870 Form("ProcessCurrentDetector -"
675f64cd 1871 " Error retrieving DCS data points from server %s."
1872 " Sending mail to DCS experts!", host.Data()));
1d172743 1873 UpdateShuttleStatus(AliShuttleStatus::kDCSError);
675f64cd 1874
b5f1c82e 1875 //if (!SendMailToDCS())
1876 // Log("SHUTTLE", Form("ProcessCurrentDetector - Could not send mail to DCS experts!"));
675f64cd 1877
1d172743 1878 if (aliasMap) delete aliasMap;
1879 delete dcsMap;
1880 return kFALSE;
1881 }
a038aa70 1882 }
1d172743 1883
1884 // merge aliasMap and dpMap into dcsMap
1885 if(aliasMap) {
1886 TIter iter(aliasMap);
a038aa70 1887 TObjString* key = 0;
1888 while ((key = (TObjString*) iter.Next()))
1d172743 1889 dcsMap->Add(key, aliasMap->GetValue(key->String()));
1890
1891 aliasMap->SetOwner(kFALSE);
1892 delete aliasMap;
1893 }
1894
1895 if(dpMap) {
1896 TIter iter(dpMap);
1897 TObjString* key = 0;
1898 while ((key = (TObjString*) iter.Next()))
1899 dcsMap->Add(key, dpMap->GetValue(key->String()));
1900
1901 dpMap->SetOwner(kFALSE);
1902 delete dpMap;
a038aa70 1903 }
73abe331 1904 }
1905 }
dc25836b 1906
b1d18693 1907 // save map into file, to help debugging in case of preprocessor error
1908 TFile* f = TFile::Open("DCSMap.root","recreate");
1909 f->cd();
1910 dcsMap->Write("DCSMap", TObject::kSingleKey);
1911 f->Close();
1912 delete f;
1913
2bb7b766 1914 // DCS Archive DB processing successful. Call Preprocessor!
85a80aa9 1915 UpdateShuttleStatus(AliShuttleStatus::kPPStarted);
a7160fe9 1916
a038aa70 1917 UInt_t returnValue = aPreprocessor->Process(dcsMap);
b948db8d 1918
3301427a 1919 if (returnValue > 0) // Preprocessor error!
1920 {
c88ad5db 1921 Log(fCurrentDetector, Form("ProcessCurrentDetector - "
1922 "Preprocessor failed. Process returned %d.", returnValue));
cb343cfd 1923 UpdateShuttleStatus(AliShuttleStatus::kPPError);
a038aa70 1924 dcsMap->DeleteAll();
1925 delete dcsMap;
9827400b 1926 return kFALSE;
1927 }
1928
1929 // preprocessor ok!
1930 UpdateShuttleStatus(AliShuttleStatus::kPPDone);
1931 Log(fCurrentDetector, Form("ProcessCurrentDetector - %s preprocessor returned success",
1932 fCurrentDetector.Data()));
b948db8d 1933
a038aa70 1934 dcsMap->DeleteAll();
1935 delete dcsMap;
b948db8d 1936
9827400b 1937 return kTRUE;
2bb7b766 1938}
1939
1940//______________________________________________________________________________________________
1941Bool_t AliShuttle::QueryShuttleLogbook(const char* whereClause,
1942 TObjArray& entries)
1943{
9827400b 1944 // Query DAQ's Shuttle logbook and fills detector status object.
1945 // Call QueryRunParameters to query DAQ logbook for run parameters.
1946 //
2bb7b766 1947
fc5a4708 1948 entries.SetOwner(1);
1949
2bb7b766 1950 // check connection, in case connect
be48e3ea 1951 if(!Connect(3)) return kFALSE;
2bb7b766 1952
1953 TString sqlQuery;
441b0e9c 1954 sqlQuery = Form("select * from %s %s order by run", fConfig->GetShuttlelbTable(), whereClause);
2bb7b766 1955
be48e3ea 1956 TSQLResult* aResult = fServer[3]->Query(sqlQuery);
2bb7b766 1957 if (!aResult) {
1958 AliError(Form("Can't execute query <%s>!", sqlQuery.Data()));
1959 return kFALSE;
1960 }
1961
fc5a4708 1962 AliDebug(2,Form("Query = %s", sqlQuery.Data()));
1963
2bb7b766 1964 if(aResult->GetRowCount() == 0) {
c88ad5db 1965 Log("SHUTTLE", "No entries in Shuttle Logbook match request");
9827400b 1966 delete aResult;
1967 return kTRUE;
2bb7b766 1968 }
1969
1970 // TODO Check field count!
db99d43e 1971 const UInt_t nCols = 23;
2bb7b766 1972 if (aResult->GetFieldCount() != (Int_t) nCols) {
c88ad5db 1973 Log("SHUTTLE", "Invalid SQL result field number!");
2bb7b766 1974 delete aResult;
1975 return kFALSE;
1976 }
1977
2bb7b766 1978 TSQLRow* aRow;
1979 while ((aRow = aResult->Next())) {
1980 TString runString(aRow->GetField(0), aRow->GetFieldLength(0));
1981 Int_t run = runString.Atoi();
1982
eba76848 1983 AliShuttleLogbookEntry *entry = QueryRunParameters(run);
1984 if (!entry)
1985 continue;
2bb7b766 1986
1987 // loop on detectors
eba76848 1988 for(UInt_t ii = 0; ii < nCols; ii++)
1989 entry->SetDetectorStatus(aResult->GetFieldName(ii), aRow->GetField(ii));
2bb7b766 1990
eba76848 1991 entries.AddLast(entry);
2bb7b766 1992 delete aRow;
1993 }
1994
2bb7b766 1995 delete aResult;
1996 return kTRUE;
1997}
1998
1999//______________________________________________________________________________________________
eba76848 2000AliShuttleLogbookEntry* AliShuttle::QueryRunParameters(Int_t run)
2bb7b766 2001{
eba76848 2002 //
2003 // Retrieve run parameters written in the DAQ logbook and sets them into AliShuttleLogbookEntry object
2004 //
2bb7b766 2005
2006 // check connection, in case connect
be48e3ea 2007 if (!Connect(3))
eba76848 2008 return 0;
2bb7b766 2009
2010 TString sqlQuery;
2c15234c 2011 sqlQuery.Form("select * from %s where run=%d", fConfig->GetDAQlbTable(), run);
2bb7b766 2012
be48e3ea 2013 TSQLResult* aResult = fServer[3]->Query(sqlQuery);
2bb7b766 2014 if (!aResult) {
c88ad5db 2015 Log("SHUTTLE", Form("Can't execute query <%s>!", sqlQuery.Data()));
eba76848 2016 return 0;
2bb7b766 2017 }
2018
eba76848 2019 if (aResult->GetRowCount() == 0) {
2bb7b766 2020 Log("SHUTTLE", Form("QueryRunParameters - No entry in DAQ Logbook for run %d. Skipping", run));
2021 delete aResult;
eba76848 2022 return 0;
2bb7b766 2023 }
2024
eba76848 2025 if (aResult->GetRowCount() > 1) {
c88ad5db 2026 Log("SHUTTLE", Form("QueryRunParameters - UNEXPECTED: "
2027 "more than one entry in DAQ Logbook for run %d!", run));
2bb7b766 2028 delete aResult;
eba76848 2029 return 0;
2bb7b766 2030 }
2031
eba76848 2032 TSQLRow* aRow = aResult->Next();
2033 if (!aRow)
2034 {
c88ad5db 2035 Log("SHUTTLE", Form("QueryRunParameters - Could not retrieve row for run %d. Skipping", run));
eba76848 2036 delete aResult;
2037 return 0;
2038 }
2bb7b766 2039
eba76848 2040 AliShuttleLogbookEntry* entry = new AliShuttleLogbookEntry(run);
2bb7b766 2041
eba76848 2042 for (Int_t ii = 0; ii < aResult->GetFieldCount(); ii++)
2043 entry->SetRunParameter(aResult->GetFieldName(ii), aRow->GetField(ii));
2bb7b766 2044
eba76848 2045 UInt_t startTime = entry->GetStartTime();
2046 UInt_t endTime = entry->GetEndTime();
2047
1abfbb60 2048// if (!startTime || !endTime || startTime > endTime)
2049// {
2050// Log("SHUTTLE",
2051// Form("QueryRunParameters - Invalid parameters for Run %d: startTime = %d, endTime = %d. Skipping!",
2052// run, startTime, endTime));
2053//
2054// Log("SHUTTLE", Form("Marking SHUTTLE done for run %d", run));
2055// fLogbookEntry = entry;
2056// if (!UpdateShuttleLogbook("shuttle_done"))
2057// {
2058// AliError(Form("Could not update logbook for run %d !", run));
2059// }
2060// fLogbookEntry = 0;
2061//
2062// delete entry;
2063// delete aRow;
2064// delete aResult;
2065// return 0;
2066// }
2067
2068 if (!startTime)
b0e53b15 2069 {
2070 Log("SHUTTLE",
1abfbb60 2071 Form("QueryRunParameters - Invalid parameters for Run %d: "
2072 "startTime = %d, endTime = %d. Skipping!",
2073 run, startTime, endTime));
b0e53b15 2074
2075 Log("SHUTTLE", Form("Marking SHUTTLE done for run %d", run));
2076 fLogbookEntry = entry;
ee6f7523 2077 if (!UpdateShuttleLogbook("shuttle_ignored"))
b0e53b15 2078 {
2079 AliError(Form("Could not update logbook for run %d !", run));
2080 }
2081 fLogbookEntry = 0;
2082
2083 delete entry;
2084 delete aRow;
2085 delete aResult;
2086 return 0;
2087 }
2088
1abfbb60 2089 if (startTime && !endTime)
2090 {
2091 // TODO Here we don't mark SHUTTLE done, because this may mean
2092 //the run is still ongoing!!
2093 Log("SHUTTLE",
2094 Form("QueryRunParameters - Invalid parameters for Run %d: "
2095 "startTime = %d, endTime = %d. Skipping (Shuttle won't be marked as DONE)!",
2096 run, startTime, endTime));
2097
2098 //Log("SHUTTLE", Form("Marking SHUTTLE done for run %d", run));
2099 //fLogbookEntry = entry;
2100 //if (!UpdateShuttleLogbook("shuttle_done"))
2101 //{
2102 // AliError(Form("Could not update logbook for run %d !", run));
2103 //}
2104 //fLogbookEntry = 0;
2105
2106 delete entry;
2107 delete aRow;
2108 delete aResult;
2109 return 0;
2110 }
2111
2112 if (startTime && endTime && (startTime > endTime))
2113 {
2114 Log("SHUTTLE",
2115 Form("QueryRunParameters - Invalid parameters for Run %d: "
2116 "startTime = %d, endTime = %d. Skipping!",
2117 run, startTime, endTime));
2118
2119 Log("SHUTTLE", Form("Marking SHUTTLE done for run %d", run));
2120 fLogbookEntry = entry;
ee6f7523 2121 if (!UpdateShuttleLogbook("shuttle_ignored"))
1abfbb60 2122 {
2123 AliError(Form("Could not update logbook for run %d !", run));
2124 }
2125 fLogbookEntry = 0;
2126
2127 delete entry;
2128 delete aRow;
2129 delete aResult;
2130 return 0;
2131 }
2132
b0e53b15 2133 TString totEventsStr = entry->GetRunParameter("totalEvents");
2134 Int_t totEvents = totEventsStr.Atoi();
2135 if (totEvents < 1)
2136 {
eba76848 2137 Log("SHUTTLE",
b0e53b15 2138 Form("QueryRunParameters - Run %d has 0 events - Skipping!", run));
2139
2140 Log("SHUTTLE", Form("Marking SHUTTLE done for run %d", run));
2141 fLogbookEntry = entry;
97e3c167 2142 if (!UpdateShuttleLogbook("shuttle_ignored"))
b0e53b15 2143 {
2144 AliError(Form("Could not update logbook for run %d !", run));
2145 }
2146 fLogbookEntry = 0;
2147
eba76848 2148 delete entry;
2bb7b766 2149 delete aRow;
eba76848 2150 delete aResult;
2151 return 0;
2bb7b766 2152 }
2153
eba76848 2154 delete aRow;
2bb7b766 2155 delete aResult;
eba76848 2156
2157 return entry;
2bb7b766 2158}
2159
a038aa70 2160//______________________________________________________________________________________________
2161TMap* AliShuttle::GetValueSet(const char* host, Int_t port, const TSeqCollection* entries,
542b6cc8 2162 DCSType type, Int_t multiSplit)
a038aa70 2163{
2164 // Retrieve all "entry" data points from the DCS server
2165 // host, port: TSocket connection parameters
2166 // entries: list of name of the alias or data point
2167 // type: kAlias or kDP
2168 // returns TMap of values, 0 when failure
542b6cc8 2169
2170 AliDCSClient client(host, port, fTimeout, fRetries, multiSplit);
b41b252a 2171
a038aa70 2172 TMap* result = 0;
b41b252a 2173 if (type == kAlias)
a038aa70 2174 {
b41b252a 2175 result = client.GetAliasValues(entries, GetCurrentStartTime(),
2176 GetCurrentEndTime());
2177 }
2178 else if (type == kDP)
2179 {
2180 result = client.GetDPValues(entries, GetCurrentStartTime(),
2181 GetCurrentEndTime());
2182 }
a038aa70 2183
b41b252a 2184 if (result == 0)
2185 {
2186 Log(fCurrentDetector.Data(), Form("GetValueSet - Can't get entries! Reason: %s",
1790d4b7 2187 client.GetErrorString(client.GetResultErrorCode())));
2188 if (client.GetResultErrorCode() == AliDCSClient::fgkServerError)
2189 Log(fCurrentDetector.Data(), Form("GetValueSet - Server error code: %s",
2190 client.GetServerError().Data()));
a038aa70 2191
b41b252a 2192 return 0;
a038aa70 2193 }
b41b252a 2194
a038aa70 2195 return result;
2196}
b41b252a 2197
b948db8d 2198//______________________________________________________________________________________________
57f50b3c 2199const char* AliShuttle::GetFile(Int_t system, const char* detector,
2200 const char* id, const char* source)
b948db8d 2201{
9827400b 2202 // Get calibration file from file exchange servers
2203 // First queris the FXS database for the file name, using the run, detector, id and source info
2204 // then calls RetrieveFile(filename) for actual copy to local disk
2205 // run: current run being processed (given by Logbook entry fLogbookEntry)
2206 // detector: the Preprocessor name
2207 // id: provided as a parameter by the Preprocessor
2208 // source: provided by the Preprocessor through GetFileSources function
2209
2210 // check if test mode should simulate a FXS error
2211 if (fTestMode & kErrorFXSFiles)
2212 {
2213 Log(detector, Form("GetFile - In TESTMODE - Simulating error while connecting to %s FXS", GetSystemName(system)));
2214 return 0;
2215 }
2216
57f50b3c 2217 // check connection, in case connect
9d733021 2218 if (!Connect(system))
eba76848 2219 {
9d733021 2220 Log(detector, Form("GetFile - Couldn't connect to %s FXS database", GetSystemName(system)));
57f50b3c 2221 return 0;
2222 }
2223
2224 // Query preparation
9d733021 2225 TString sourceName(source);
d386d623 2226 Int_t nFields = 3;
2227 TString sqlQueryStart = Form("select filePath,size,fileChecksum from %s where",
2228 fConfig->GetFXSdbTable(system));
2229 TString whereClause = Form("run=%d and detector=\"%s\" and fileId=\"%s\"",
2230 GetCurrentRun(), detector, id);
2231
9d733021 2232 if (system == kDAQ)
2233 {
d386d623 2234 whereClause += Form(" and DAQsource=\"%s\"", source);
57f50b3c 2235 }
9d733021 2236 else if (system == kDCS)
eba76848 2237 {
9d733021 2238 sourceName="none";
57f50b3c 2239 }
9d733021 2240 else if (system == kHLT)
9e080f92 2241 {
d386d623 2242 whereClause += Form(" and DDLnumbers=\"%s\"", source);
9d733021 2243 nFields = 3;
9e080f92 2244 }
2245
9e080f92 2246 TString sqlQuery = Form("%s %s", sqlQueryStart.Data(), whereClause.Data());
2247
2248 AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
2249
2250 // Query execution
2251 TSQLResult* aResult = 0;
9d733021 2252 aResult = dynamic_cast<TSQLResult*> (fServer[system]->Query(sqlQuery));
9e080f92 2253 if (!aResult) {
9d733021 2254 Log(detector, Form("GetFileName - Can't execute SQL query to %s database for: id = %s, source = %s",
2255 GetSystemName(system), id, sourceName.Data()));
9e080f92 2256 return 0;
2257 }
2258
2259 if(aResult->GetRowCount() == 0)
2260 {
2261 Log(detector,
9d733021 2262 Form("GetFileName - No entry in %s FXS db for: id = %s, source = %s",
2263 GetSystemName(system), id, sourceName.Data()));
9e080f92 2264 delete aResult;
2265 return 0;
2266 }
2bb7b766 2267
9e080f92 2268 if (aResult->GetRowCount() > 1) {
2269 Log(detector,
9d733021 2270 Form("GetFileName - More than one entry in %s FXS db for: id = %s, source = %s",
2271 GetSystemName(system), id, sourceName.Data()));
9e080f92 2272 delete aResult;
2273 return 0;
2274 }
2275
9d733021 2276 if (aResult->GetFieldCount() != nFields) {
9e080f92 2277 Log(detector,
9d733021 2278 Form("GetFileName - Wrong field count in %s FXS db for: id = %s, source = %s",
2279 GetSystemName(system), id, sourceName.Data()));
9e080f92 2280 delete aResult;
2281 return 0;
2282 }
2283
2284 TSQLRow* aRow = dynamic_cast<TSQLRow*> (aResult->Next());
2285
2286 if (!aRow){
9d733021 2287 Log(detector, Form("GetFileName - Empty set result in %s FXS db from query: id = %s, source = %s",
2288 GetSystemName(system), id, sourceName.Data()));
9e080f92 2289 delete aResult;
2290 return 0;
2291 }
2292
2293 TString filePath(aRow->GetField(0), aRow->GetFieldLength(0));
2294 TString fileSize(aRow->GetField(1), aRow->GetFieldLength(1));
d386d623 2295 TString fileChecksum(aRow->GetField(2), aRow->GetFieldLength(2));
9e080f92 2296
2297 delete aResult;
2298 delete aRow;
2299
d386d623 2300 AliDebug(2, Form("filePath = %s; size = %s, fileChecksum = %s",
2301 filePath.Data(), fileSize.Data(), fileChecksum.Data()));
9e080f92 2302
9e080f92 2303 // retrieved file is renamed to make it unique
675f64cd 2304 TString localFileName = Form("%s/%s_%d_process/%s_%s_%d_%s_%s.shuttle",
2305 GetShuttleTempDir(), detector, GetCurrentRun(),
d524ade6 2306 GetSystemName(system), detector, GetCurrentRun(),
2307 id, sourceName.Data());
9d733021 2308
9e080f92 2309
9d733021 2310 // file retrieval from FXS
4b95672b 2311 UInt_t nRetries = 0;
2312 UInt_t maxRetries = 3;
2313 Bool_t result = kFALSE;
2314
2315 // copy!! if successful TSystem::Exec returns 0
2316 while(nRetries++ < maxRetries) {
2317 AliDebug(2, Form("Trying to copy file. Retry # %d", nRetries));
2318 result = RetrieveFile(system, filePath.Data(), localFileName.Data());
2319 if(!result)
2320 {
2321 Log(detector, Form("GetFileName - Copy of file %s from %s FXS failed",
9d733021 2322 filePath.Data(), GetSystemName(system)));
4b95672b 2323 continue;
4f0749a8 2324 }
9e080f92 2325
d386d623 2326 if (fileChecksum.Length()>0)
4b95672b 2327 {
2328 // compare md5sum of local file with the one stored in the FXS DB
d524ade6 2329 Int_t md5Comp = gSystem->Exec(Form("md5sum %s |grep %s 2>&1 > /dev/null",
2330 localFileName.Data(), fileChecksum.Data()));
9e080f92 2331
4b95672b 2332 if (md5Comp != 0)
2333 {
2334 Log(detector, Form("GetFileName - md5sum of file %s does not match with local copy!",
2335 filePath.Data()));
2336 result = kFALSE;
2337 continue;
2338 }
d386d623 2339 } else {
2340 Log(fCurrentDetector, Form("GetFile - md5sum of file %s not set in %s database, skipping comparison",
2341 filePath.Data(), GetSystemName(system)));
9d733021 2342 }
4b95672b 2343 if (result) break;
9e080f92 2344 }
2345
4b95672b 2346 if(!result) return 0;
2347
9d733021 2348 fFXSCalled[system]=kTRUE;
2349 TObjString *fileParams = new TObjString(Form("%s#!?!#%s", id, sourceName.Data()));
2350 fFXSlist[system].Add(fileParams);
9e080f92 2351
675f64cd 2352 static TString staticLocalFileName;
2353 staticLocalFileName.Form("%s", localFileName.Data());
2354
c88ad5db 2355 Log(fCurrentDetector, Form("GetFile - Retrieved file with id %s and "
2356 "source %s from %s to %s", id, source,
d524ade6 2357 GetSystemName(system), localFileName.Data()));
675f64cd 2358
d524ade6 2359 return staticLocalFileName.Data();
2bb7b766 2360}
2361
2362//______________________________________________________________________________________________
9d733021 2363Bool_t AliShuttle::RetrieveFile(UInt_t system, const char* fxsFileName, const char* localFileName)
9e080f92 2364{
9827400b 2365 //
2366 // Copies file from FXS to local Shuttle machine
2367 //
2bb7b766 2368
9e080f92 2369 // check temp directory: trying to cd to temp; if it does not exist, create it
d524ade6 2370 AliDebug(2, Form("Copy file %s from %s FXS into %s",
2371 GetSystemName(system), fxsFileName, localFileName));
2372
2373 TString tmpDir(localFileName);
2374
2375 tmpDir = tmpDir(0,tmpDir.Last('/'));
9e080f92 2376
d524ade6 2377 Int_t noDir = gSystem->GetPathInfo(tmpDir.Data(), 0, (Long64_t*) 0, 0, 0);
2378 if (noDir) // temp dir does not exists!
2379 {
2380 if (gSystem->mkdir(tmpDir.Data(), 1))
2381 {
2382 Log(fCurrentDetector.Data(), "RetrieveFile - could not make temp directory!!");
9e080f92 2383 return kFALSE;
2384 }
9e080f92 2385 }
2386
9d733021 2387 TString baseFXSFolder;
2388 if (system == kDAQ)
2389 {
2390 baseFXSFolder = "FES/";
2391 }
2392 else if (system == kDCS)
2393 {
2394 baseFXSFolder = "";
2395 }
2396 else if (system == kHLT)
2397 {
42fde080 2398 baseFXSFolder = "/opt/FXS/";
9d733021 2399 }
2400
2401
d524ade6 2402 TString command = Form("scp -oPort=%d -2 %s@%s:%s%s %s",
9d733021 2403 fConfig->GetFXSPort(system),
2404 fConfig->GetFXSUser(system),
2405 fConfig->GetFXSHost(system),
2406 baseFXSFolder.Data(),
2407 fxsFileName,
9e080f92 2408 localFileName);
2409
2410 AliDebug(2, Form("%s",command.Data()));
2411
4b95672b 2412 Bool_t result = (gSystem->Exec(command.Data()) == 0);
9e080f92 2413
4b95672b 2414 return result;
9e080f92 2415}
2416
2417//______________________________________________________________________________________________
9d733021 2418TList* AliShuttle::GetFileSources(Int_t system, const char* detector, const char* id)
2419{
9827400b 2420 //
2421 // Get sources producing the condition file Id from file exchange servers
4a33bdd9 2422 // if id is NULL all sources are returned (distinct)
9827400b 2423 //
1bcd28db 2424
2425 Log(detector, Form("GetFileSources - Retrieving sources with id %s from %s", id, GetSystemName(system)));
9827400b 2426
2427 // check if test mode should simulate a FXS error
2428 if (fTestMode & kErrorFXSSources)
2429 {
2430 Log(detector, Form("GetFileSources - In TESTMODE - Simulating error while connecting to %s FXS", GetSystemName(system)));
2431 return 0;
2432 }
2433
9d733021 2434 if (system == kDCS)
2435 {
c88ad5db 2436 Log(detector, "GetFileSources - WARNING: DCS system has only one source of data!");
6297b37d 2437 TList *list = new TList();
2438 list->SetOwner(1);
2439 list->Add(new TObjString(" "));
2440 return list;
9d733021 2441 }
9e080f92 2442
2443 // check connection, in case connect
9d733021 2444 if (!Connect(system))
2445 {
4a33bdd9 2446 Log(detector, Form("GetFileSources - Couldn't connect to %s FXS database", GetSystemName(system)));
9d733021 2447 return NULL;
9e080f92 2448 }
2449
9d733021 2450 TString sourceName = 0;
2451 if (system == kDAQ)
2452 {
2453 sourceName = "DAQsource";
2454 } else if (system == kHLT)
2455 {
2456 sourceName = "DDLnumbers";
2457 }
2458
4a33bdd9 2459 TString sqlQueryStart = Form("select distinct %s from %s where", sourceName.Data(), fConfig->GetFXSdbTable(system));
2460 TString whereClause = Form("run=%d and detector=\"%s\"",
2461 GetCurrentRun(), detector);
2462 if (id)
2463 whereClause += Form(" and fileId=\"%s\"", id);
9e080f92 2464 TString sqlQuery = Form("%s %s", sqlQueryStart.Data(), whereClause.Data());
2465
2466 AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
2467
2468 // Query execution
2469 TSQLResult* aResult;
9d733021 2470 aResult = fServer[system]->Query(sqlQuery);
9e080f92 2471 if (!aResult) {
9d733021 2472 Log(detector, Form("GetFileSources - Can't execute SQL query to %s database for id: %s",
2473 GetSystemName(system), id));
9e080f92 2474 return 0;
2475 }
2476
86aa42c3 2477 TList *list = new TList();
2478 list->SetOwner(1);
2479
9d733021 2480 if (aResult->GetRowCount() == 0)
2481 {
9e080f92 2482 Log(detector,
9d733021 2483 Form("GetFileSources - No entry in %s FXS table for id: %s", GetSystemName(system), id));
9e080f92 2484 delete aResult;
86aa42c3 2485 return list;
9e080f92 2486 }
2487
1bcd28db 2488 Log(detector, Form("GetFileSources - Found %d sources", aResult->GetRowCount()));
9e080f92 2489
1bcd28db 2490 TSQLRow* aRow;
9d733021 2491 while ((aRow = aResult->Next()))
2492 {
9e080f92 2493
9d733021 2494 TString source(aRow->GetField(0), aRow->GetFieldLength(0));
2495 AliDebug(2, Form("%s = %s", sourceName.Data(), source.Data()));
2496 list->Add(new TObjString(source));
9e080f92 2497 delete aRow;
2498 }
9d733021 2499
9e080f92 2500 delete aResult;
2501
2502 return list;
2bb7b766 2503}
2504
4a33bdd9 2505//______________________________________________________________________________________________
2506TList* AliShuttle::GetFileIDs(Int_t system, const char* detector, const char* source)
2507{
2508 //
2509 // Get all ids of condition files produced by a given source from file exchange servers
2510 //
2511
1bcd28db 2512 Log(detector, Form("GetFileIDs - Retrieving ids with source %s with %s", source, GetSystemName(system)));
2513
4a33bdd9 2514 // check if test mode should simulate a FXS error
2515 if (fTestMode & kErrorFXSSources)
2516 {
2517 Log(detector, Form("GetFileIDs - In TESTMODE - Simulating error while connecting to %s FXS", GetSystemName(system)));
2518 return 0;
2519 }
2520
2521 // check connection, in case connect
2522 if (!Connect(system))
2523 {
2524 Log(detector, Form("GetFileIDs - Couldn't connect to %s FXS database", GetSystemName(system)));
2525 return NULL;
2526 }
2527
2528 TString sourceName = 0;
2529 if (system == kDAQ)
2530 {
2531 sourceName = "DAQsource";
2532 } else if (system == kHLT)
2533 {
2534 sourceName = "DDLnumbers";
2535 }
2536
2537 TString sqlQueryStart = Form("select fileId from %s where", fConfig->GetFXSdbTable(system));
2538 TString whereClause = Form("run=%d and detector=\"%s\"",
2539 GetCurrentRun(), detector);
2540 if (sourceName.Length() > 0 && source)
2541 whereClause += Form(" and %s=\"%s\"", sourceName.Data(), source);
2542 TString sqlQuery = Form("%s %s", sqlQueryStart.Data(), whereClause.Data());
2543
2544 AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
2545
2546 // Query execution
2547 TSQLResult* aResult;
2548 aResult = fServer[system]->Query(sqlQuery);
2549 if (!aResult) {
2550 Log(detector, Form("GetFileIDs - Can't execute SQL query to %s database for source: %s",
2551 GetSystemName(system), source));
2552 return 0;
2553 }
2554
2555 TList *list = new TList();
2556 list->SetOwner(1);
2557
2558 if (aResult->GetRowCount() == 0)
2559 {
2560 Log(detector,
2561 Form("GetFileIDs - No entry in %s FXS table for source: %s", GetSystemName(system), source));
2562 delete aResult;
2563 return list;
2564 }
2565
1bcd28db 2566 Log(detector, Form("GetFileIDs - Found %d ids", aResult->GetRowCount()));
2567
4a33bdd9 2568 TSQLRow* aRow;
2569
2570 while ((aRow = aResult->Next()))
2571 {
2572
2573 TString id(aRow->GetField(0), aRow->GetFieldLength(0));
2574 AliDebug(2, Form("fileId = %s", id.Data()));
2575 list->Add(new TObjString(id));
2576 delete aRow;
2577 }
2578
2579 delete aResult;
2580
2581 return list;
2582}
2583
2bb7b766 2584//______________________________________________________________________________________________
9d733021 2585Bool_t AliShuttle::Connect(Int_t system)
2bb7b766 2586{
9827400b 2587 // Connect to MySQL Server of the system's FXS MySQL databases
2588 // DAQ Logbook, Shuttle Logbook and DAQ FXS db are on the same host
2589 //
57f50b3c 2590
9d733021 2591 // check connection: if already connected return
2592 if(fServer[system] && fServer[system]->IsConnected()) return kTRUE;
57f50b3c 2593
9d733021 2594 TString dbHost, dbUser, dbPass, dbName;
57f50b3c 2595
9d733021 2596 if (system < 3) // FXS db servers
2597 {
2598 dbHost = Form("mysql://%s:%d", fConfig->GetFXSdbHost(system), fConfig->GetFXSdbPort(system));
2599 dbUser = fConfig->GetFXSdbUser(system);
2600 dbPass = fConfig->GetFXSdbPass(system);
2601 dbName = fConfig->GetFXSdbName(system);
2602 } else { // Run & Shuttle logbook servers
2603 // TODO Will the Shuttle logbook server be the same as the Run logbook server ???
2604 dbHost = Form("mysql://%s:%d", fConfig->GetDAQlbHost(), fConfig->GetDAQlbPort());
2605 dbUser = fConfig->GetDAQlbUser();
2606 dbPass = fConfig->GetDAQlbPass();
2607 dbName = fConfig->GetDAQlbDB();
2608 }
57f50b3c 2609
9d733021 2610 fServer[system] = TSQLServer::Connect(dbHost.Data(), dbUser.Data(), dbPass.Data());
2611 if (!fServer[system] || !fServer[system]->IsConnected()) {
2612 if(system < 3)
2613 {
2614 AliError(Form("Can't establish connection to FXS database for %s",
2615 AliShuttleInterface::GetSystemName(system)));
2616 } else {
2617 AliError("Can't establish connection to Run logbook.");
57f50b3c 2618 }
9d733021 2619 if(fServer[system]) delete fServer[system];
2620 return kFALSE;
2bb7b766 2621 }
57f50b3c 2622
9d733021 2623 // Get tables
2624 TSQLResult* aResult=0;
2625 switch(system){
2626 case kDAQ:
2627 aResult = fServer[kDAQ]->GetTables(dbName.Data());
2628 break;
2629 case kDCS:
2630 aResult = fServer[kDCS]->GetTables(dbName.Data());
2631 break;
2632 case kHLT:
2633 aResult = fServer[kHLT]->GetTables(dbName.Data());
2634 break;
2635 default:
2636 aResult = fServer[3]->GetTables(dbName.Data());
2637 break;
2638 }
2639
2640 delete aResult;
2bb7b766 2641 return kTRUE;
2642}
57f50b3c 2643
9e080f92 2644//______________________________________________________________________________________________
9d733021 2645Bool_t AliShuttle::UpdateTable()
9e080f92 2646{
9827400b 2647 //
2648 // Update FXS table filling time_processed field in all rows corresponding to current run and detector
2649 //
9e080f92 2650
9d733021 2651 Bool_t result = kTRUE;
9e080f92 2652
9d733021 2653 for (UInt_t system=0; system<3; system++)
2654 {
2655 if(!fFXSCalled[system]) continue;
9e080f92 2656
9d733021 2657 // check connection, in case connect
2658 if (!Connect(system))
2659 {
2660 Log(fCurrentDetector, Form("UpdateTable - Couldn't connect to %s FXS database", GetSystemName(system)));
2661 result = kFALSE;
2662 continue;
9e080f92 2663 }
9e080f92 2664
9d733021 2665 TTimeStamp now; // now
2666
2667 // Loop on FXS list entries
2668 TIter iter(&fFXSlist[system]);
2669 TObjString *aFXSentry=0;
2670 while ((aFXSentry = dynamic_cast<TObjString*> (iter.Next())))
2671 {
2672 TString aFXSentrystr = aFXSentry->String();
2673 TObjArray *aFXSarray = aFXSentrystr.Tokenize("#!?!#");
2674 if (!aFXSarray || aFXSarray->GetEntries() != 2 )
2675 {
2676 Log(fCurrentDetector, Form("UpdateTable - error updating %s FXS entry. Check string: <%s>",
2677 GetSystemName(system), aFXSentrystr.Data()));
2678 if(aFXSarray) delete aFXSarray;
2679 result = kFALSE;
2680 continue;
2681 }
2682 const char* fileId = ((TObjString*) aFXSarray->At(0))->GetName();
2683 const char* source = ((TObjString*) aFXSarray->At(1))->GetName();
2684
2685 TString whereClause;
2686 if (system == kDAQ)
2687 {
2688 whereClause = Form("where run=%d and detector=\"%s\" and fileId=\"%s\" and DAQsource=\"%s\";",
2689 GetCurrentRun(), fCurrentDetector.Data(), fileId, source);
2690 }
2691 else if (system == kDCS)
2692 {
2693 whereClause = Form("where run=%d and detector=\"%s\" and fileId=\"%s\";",
2694 GetCurrentRun(), fCurrentDetector.Data(), fileId);
2695 }
2696 else if (system == kHLT)
2697 {
2698 whereClause = Form("where run=%d and detector=\"%s\" and fileId=\"%s\" and DDLnumbers=\"%s\";",
2699 GetCurrentRun(), fCurrentDetector.Data(), fileId, source);
2700 }
2701
2702 delete aFXSarray;
9e080f92 2703
9d733021 2704 TString sqlQuery = Form("update %s set time_processed=%d %s", fConfig->GetFXSdbTable(system),
2705 now.GetSec(), whereClause.Data());
9e080f92 2706
9d733021 2707 AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
9e080f92 2708
9d733021 2709 // Query execution
2710 TSQLResult* aResult;
2711 aResult = dynamic_cast<TSQLResult*> (fServer[system]->Query(sqlQuery));
2712 if (!aResult)
2713 {
2714 Log(fCurrentDetector, Form("UpdateTable - %s db: can't execute SQL query <%s>",
2715 GetSystemName(system), sqlQuery.Data()));
2716 result = kFALSE;
2717 continue;
2718 }
2719 delete aResult;
9e080f92 2720 }
9e080f92 2721 }
2722
9d733021 2723 return result;
9e080f92 2724}
57f50b3c 2725
3301427a 2726//______________________________________________________________________________________________
2727Bool_t AliShuttle::UpdateTableFailCase()
2728{
9827400b 2729 // Update FXS table filling time_processed field in all rows corresponding to current run and detector
2730 // this is called in case the preprocessor is declared failed for the current run, because
2731 // the fields are updated only in case of success
3301427a 2732
2733 Bool_t result = kTRUE;
2734
2735 for (UInt_t system=0; system<3; system++)
2736 {
2737 // check connection, in case connect
2738 if (!Connect(system))
2739 {
2740 Log(fCurrentDetector, Form("UpdateTableFailCase - Couldn't connect to %s FXS database",
2741 GetSystemName(system)));
2742 result = kFALSE;
2743 continue;
2744 }
2745
2746 TTimeStamp now; // now
2747
2748 // Loop on FXS list entries
2749
2750 TString whereClause = Form("where run=%d and detector=\"%s\";",
2751 GetCurrentRun(), fCurrentDetector.Data());
2752
2753
2754 TString sqlQuery = Form("update %s set time_processed=%d %s", fConfig->GetFXSdbTable(system),
2755 now.GetSec(), whereClause.Data());
2756
2757 AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
2758
2759 // Query execution
2760 TSQLResult* aResult;
2761 aResult = dynamic_cast<TSQLResult*> (fServer[system]->Query(sqlQuery));
2762 if (!aResult)
2763 {
2764 Log(fCurrentDetector, Form("UpdateTableFailCase - %s db: can't execute SQL query <%s>",
2765 GetSystemName(system), sqlQuery.Data()));
2766 result = kFALSE;
2767 continue;
2768 }
2769 delete aResult;
2770 }
2771
2772 return result;
2773}
2774
2bb7b766 2775//______________________________________________________________________________________________
2776Bool_t AliShuttle::UpdateShuttleLogbook(const char* detector, const char* status)
2777{
e7f62f16 2778 //
2779 // Update Shuttle logbook filling detector or shuttle_done column
2780 // ex. of usage: UpdateShuttleLogbook("PHOS", "DONE") or UpdateShuttleLogbook("shuttle_done")
2781 //
57f50b3c 2782
2bb7b766 2783 // check connection, in case connect
be48e3ea 2784 if(!Connect(3)){
2bb7b766 2785 Log("SHUTTLE", "UpdateShuttleLogbook - Couldn't connect to DAQ Logbook.");
2786 return kFALSE;
57f50b3c 2787 }
2788
2bb7b766 2789 TString detName(detector);
2790 TString setClause;
ee6f7523 2791 if (detName == "shuttle_done" || detName == "shuttle_ignored")
e7f62f16 2792 {
2bb7b766 2793 setClause = "set shuttle_done=1";
e7f62f16 2794
ee6f7523 2795 if (detName == "shuttle_done")
b0e53b15 2796 {
2797 // Send the information to ML
2798 TMonaLisaText mlStatus("SHUTTLE_status", "Done");
e7f62f16 2799
b0e53b15 2800 TList mlList;
2801 mlList.Add(&mlStatus);
2802
ee6f7523 2803 TString mlID;
2804 mlID.Form("%d", GetCurrentRun());
2805 fMonaLisa->SendParameters(&mlList, mlID);
b0e53b15 2806 }
2bb7b766 2807 } else {
2bb7b766 2808 TString statusStr(status);
2809 if(statusStr.Contains("done", TString::kIgnoreCase) ||
2810 statusStr.Contains("failed", TString::kIgnoreCase)){
eba76848 2811 setClause = Form("set %s=\"%s\"", detector, status);
2bb7b766 2812 } else {
2813 Log("SHUTTLE",
2814 Form("UpdateShuttleLogbook - Invalid status <%s> for detector %s",
2815 status, detector));
2816 return kFALSE;
2817 }
2818 }
57f50b3c 2819
2bb7b766 2820 TString whereClause = Form("where run=%d", GetCurrentRun());
2821
441b0e9c 2822 TString sqlQuery = Form("update %s %s %s",
2823 fConfig->GetShuttlelbTable(), setClause.Data(), whereClause.Data());
57f50b3c 2824
2bb7b766 2825 AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
2826
2827 // Query execution
2828 TSQLResult* aResult;
be48e3ea 2829 aResult = dynamic_cast<TSQLResult*> (fServer[3]->Query(sqlQuery));
2bb7b766 2830 if (!aResult) {
2831 Log("SHUTTLE", Form("UpdateShuttleLogbook - Can't execute query <%s>", sqlQuery.Data()));
2832 return kFALSE;
57f50b3c 2833 }
2bb7b766 2834 delete aResult;
57f50b3c 2835
2836 return kTRUE;
2837}
2838
2839//______________________________________________________________________________________________
2bb7b766 2840Int_t AliShuttle::GetCurrentRun() const
2841{
9827400b 2842 //
2843 // Get current run from logbook entry
2844 //
57f50b3c 2845
2bb7b766 2846 return fLogbookEntry ? fLogbookEntry->GetRun() : -1;
57f50b3c 2847}
2848
2849//______________________________________________________________________________________________
2bb7b766 2850UInt_t AliShuttle::GetCurrentStartTime() const
2851{
9827400b 2852 //
2853 // get current start time
2854 //
57f50b3c 2855
2bb7b766 2856 return fLogbookEntry ? fLogbookEntry->GetStartTime() : 0;
57f50b3c 2857}
2858
2859//______________________________________________________________________________________________
2bb7b766 2860UInt_t AliShuttle::GetCurrentEndTime() const
2861{
9827400b 2862 //
2863 // get current end time from logbook entry
2864 //
57f50b3c 2865
2bb7b766 2866 return fLogbookEntry ? fLogbookEntry->GetEndTime() : 0;
57f50b3c 2867}
2868
675f64cd 2869//______________________________________________________________________________________________
2870UInt_t AliShuttle::GetCurrentYear() const
2871{
2872 //
2873 // Get current year from logbook entry
2874 //
2875
2876 if (!fLogbookEntry) return 0;
2877
2878 TTimeStamp startTime(GetCurrentStartTime());
2879 TString year = Form("%d",startTime.GetDate());
2880 year = year(0,4);
2881
2882 return year.Atoi();
2883}
2884
2885//______________________________________________________________________________________________
2886const char* AliShuttle::GetLHCPeriod() const
2887{
2888 //
2889 // Get current LHC period from logbook entry
2890 //
2891
2892 if (!fLogbookEntry) return 0;
2893
2894 return fLogbookEntry->GetRunParameter("LHCperiod");
2895}
2896
b948db8d 2897//______________________________________________________________________________________________
2898void AliShuttle::Log(const char* detector, const char* message)
2899{
9827400b 2900 //
2901 // Fill log string with a message
2902 //
b948db8d 2903
7d4cf768 2904 TString logRunDir = GetShuttleLogDir();
2905 if (GetCurrentRun() >=0)
2906 logRunDir += Form("/%d", GetCurrentRun());
2907
2908 void* dir = gSystem->OpenDirectory(logRunDir.Data());
84090f85 2909 if (dir == NULL) {
7d4cf768 2910 if (gSystem->mkdir(logRunDir.Data(), kTRUE)) {
36c99a6a 2911 AliError(Form("Can't open directory <%s>", GetShuttleLogDir()));
84090f85 2912 return;
2913 }
b948db8d 2914
84090f85 2915 } else {
2916 gSystem->FreeDirectory(dir);
2917 }
b948db8d 2918
cb343cfd 2919 TString toLog = Form("%s (%d): %s - ", TTimeStamp(time(0)).AsString("s"), getpid(), detector);
e7f62f16 2920 if (GetCurrentRun() >= 0)
2921 toLog += Form("run %d - ", GetCurrentRun());
2bb7b766 2922 toLog += Form("%s", message);
2923
84090f85 2924 AliInfo(toLog.Data());
ffa29e93 2925
2926 // if we redirect the log output already to the file, leave here
2927 if (fOutputRedirected && strcmp(detector, "SHUTTLE") != 0)
2928 return;
b948db8d 2929
ffa29e93 2930 TString fileName = GetLogFileName(detector);
e7f62f16 2931
84090f85 2932 gSystem->ExpandPathName(fileName);
2933
2934 ofstream logFile;
2935 logFile.open(fileName, ofstream::out | ofstream::app);
2936
2937 if (!logFile.is_open()) {
2938 AliError(Form("Could not open file %s", fileName.Data()));
2939 return;
2940 }
7bfb2090 2941
84090f85 2942 logFile << toLog.Data() << "\n";
b948db8d 2943
84090f85 2944 logFile.close();
b948db8d 2945}
2bb7b766 2946
ffa29e93 2947//______________________________________________________________________________________________
2948TString AliShuttle::GetLogFileName(const char* detector) const
2949{
2950 //
2951 // returns the name of the log file for a given sub detector
2952 //
2953
2954 TString fileName;
2955
2956 if (GetCurrentRun() >= 0)
7d4cf768 2957 {
2958 fileName.Form("%s/%d/%s_%d.log", GetShuttleLogDir(), GetCurrentRun(),
2959 detector, GetCurrentRun());
2960 } else {
ffa29e93 2961 fileName.Form("%s/%s.log", GetShuttleLogDir(), detector);
7d4cf768 2962 }
ffa29e93 2963
2964 return fileName;
2965}
2966
ee6f7523 2967//______________________________________________________________________________________________
2968void AliShuttle::SendAlive()
2969{
2970 // sends alive message to ML
2971
2972 TMonaLisaText mlStatus("SHUTTLE_status", "Alive");
2973
2974 TList mlList;
2975 mlList.Add(&mlStatus);
2976
2977 fMonaLisa->SendParameters(&mlList, "__PROCESSINGINFO__");
2978}
2979
2bb7b766 2980//______________________________________________________________________________________________
2981Bool_t AliShuttle::Collect(Int_t run)
2982{
9827400b 2983 //
2984 // Collects conditions data for all UNPROCESSED run written to DAQ LogBook in case of run = -1 (default)
2985 // If a dedicated run is given this run is processed
2986 //
2987 // In operational mode, this is the Shuttle function triggered by the EOR signal.
2988 //
2bb7b766 2989
eba76848 2990 if (run == -1)
2991 Log("SHUTTLE","Collect - Shuttle called. Collecting conditions data for unprocessed runs");
2992 else
2993 Log("SHUTTLE", Form("Collect - Shuttle called. Collecting conditions data for run %d", run));
cb343cfd 2994
2995 SetLastAction("Starting");
2bb7b766 2996
ee6f7523 2997 // create ML instance
2998 if (!fMonaLisa)
2999 fMonaLisa = new TMonaLisaWriter(fConfig->GetMonitorHost(), fConfig->GetMonitorTable());
3000
3001
3002 SendAlive();
3003
2bb7b766 3004 TString whereClause("where shuttle_done=0");
eba76848 3005 if (run != -1)
3006 whereClause += Form(" and run=%d", run);
2bb7b766 3007
3008 TObjArray shuttleLogbookEntries;
be48e3ea 3009 if (!QueryShuttleLogbook(whereClause, shuttleLogbookEntries))
3010 {
cb343cfd 3011 Log("SHUTTLE", "Collect - Can't retrieve entries from Shuttle logbook");
2bb7b766 3012 return kFALSE;
3013 }
3014
9e080f92 3015 if (shuttleLogbookEntries.GetEntries() == 0)
3016 {
3017 if (run == -1)
3018 Log("SHUTTLE","Collect - Found no UNPROCESSED runs in Shuttle logbook");
3019 else
3020 Log("SHUTTLE", Form("Collect - Run %d is already DONE "
3021 "or it does not exist in Shuttle logbook", run));
3022 return kTRUE;
3023 }
3024
be48e3ea 3025 for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
3026 fFirstUnprocessed[iDet] = kTRUE;
3027
fc5a4708 3028 if (run != -1)
be48e3ea 3029 {
3030 // query Shuttle logbook for earlier runs, check if some detectors are unprocessed,
3031 // flag them into fFirstUnprocessed array
3032 TString whereClause(Form("where shuttle_done=0 and run < %d", run));
3033 TObjArray tmpLogbookEntries;
3034 if (!QueryShuttleLogbook(whereClause, tmpLogbookEntries))
3035 {
3036 Log("SHUTTLE", "Collect - Can't retrieve entries from Shuttle logbook");
3037 return kFALSE;
3038 }
3039
3040 TIter iter(&tmpLogbookEntries);
3041 AliShuttleLogbookEntry* anEntry = 0;
3042 while ((anEntry = dynamic_cast<AliShuttleLogbookEntry*> (iter.Next())))
3043 {
3044 for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
3045 {
3046 if (anEntry->GetDetectorStatus(iDet) == AliShuttleLogbookEntry::kUnprocessed)
3047 {
3048 AliDebug(2, Form("Run %d: setting %s as \"not first time unprocessed\"",
3049 anEntry->GetRun(), GetDetName(iDet)));
3050 fFirstUnprocessed[iDet] = kFALSE;
3051 }
3052 }
3053
3054 }
3055
3056 }
3057
3058 if (!RetrieveConditionsData(shuttleLogbookEntries))
3059 {
cb343cfd 3060 Log("SHUTTLE", "Collect - Process of at least one run failed");
2bb7b766 3061 return kFALSE;
3062 }
3063
36c99a6a 3064 Log("SHUTTLE", "Collect - Requested run(s) successfully processed");
eba76848 3065 return kTRUE;
2bb7b766 3066}
3067
2bb7b766 3068//______________________________________________________________________________________________
3069Bool_t AliShuttle::RetrieveConditionsData(const TObjArray& dateEntries)
3070{
9827400b 3071 //
3072 // Retrieve conditions data for all runs that aren't processed yet
3073 //
2bb7b766 3074
3075 Bool_t hasError = kFALSE;
3076
3077 TIter iter(&dateEntries);
3078 AliShuttleLogbookEntry* anEntry;
3079
3080 while ((anEntry = (AliShuttleLogbookEntry*) iter.Next())){
3081 if (!Process(anEntry)){
3082 hasError = kTRUE;
3083 }
4b95672b 3084
3085 // clean SHUTTLE temp directory
d524ade6 3086 //TString filename = Form("%s/*.shuttle", GetShuttleTempDir());
3087 //RemoveFile(filename.Data());
2bb7b766 3088 }
3089
3090 return hasError == kFALSE;
3091}
cb343cfd 3092
3093//______________________________________________________________________________________________
3094ULong_t AliShuttle::GetTimeOfLastAction() const
3095{
9827400b 3096 //
3097 // Gets time of last action
3098 //
3099
cb343cfd 3100 ULong_t tmp;
36c99a6a 3101
cb343cfd 3102 fMonitoringMutex->Lock();
be48e3ea 3103
cb343cfd 3104 tmp = fLastActionTime;
36c99a6a 3105
cb343cfd 3106 fMonitoringMutex->UnLock();
36c99a6a 3107
cb343cfd 3108 return tmp;
3109}
3110
3111//______________________________________________________________________________________________
3112const TString AliShuttle::GetLastAction() const
3113{
9827400b 3114 //
cb343cfd 3115 // returns a string description of the last action
9827400b 3116 //
cb343cfd 3117
3118 TString tmp;
36c99a6a 3119
cb343cfd 3120 fMonitoringMutex->Lock();
3121
3122 tmp = fLastAction;
3123
3124 fMonitoringMutex->UnLock();
3125
36c99a6a 3126 return tmp;
cb343cfd 3127}
3128
3129//______________________________________________________________________________________________
3130void AliShuttle::SetLastAction(const char* action)
3131{
9827400b 3132 //
cb343cfd 3133 // updates the monitoring variables
9827400b 3134 //
36c99a6a 3135
cb343cfd 3136 fMonitoringMutex->Lock();
36c99a6a 3137
cb343cfd 3138 fLastAction = action;
3139 fLastActionTime = time(0);
3140
3141 fMonitoringMutex->UnLock();
3142}
eba76848 3143
3144//______________________________________________________________________________________________
3145const char* AliShuttle::GetRunParameter(const char* param)
3146{
9827400b 3147 //
3148 // returns run parameter read from DAQ logbook
3149 //
eba76848 3150
3151 if(!fLogbookEntry) {
3152 AliError("No logbook entry!");
3153 return 0;
3154 }
3155
3156 return fLogbookEntry->GetRunParameter(param);
3157}
57c1a579 3158
d386d623 3159//______________________________________________________________________________________________
9827400b 3160AliCDBEntry* AliShuttle::GetFromOCDB(const char* detector, const AliCDBPath& path)
d386d623 3161{
9827400b 3162 //
3163 // returns object from OCDB valid for current run
3164 //
d386d623 3165
9827400b 3166 if (fTestMode & kErrorOCDB)
3167 {
3168 Log(detector, "GetFromOCDB - In TESTMODE - Simulating error with OCDB");
3169 return 0;
3170 }
3171
d386d623 3172 AliCDBStorage *sto = AliCDBManager::Instance()->GetStorage(fgkMainCDB);
3173 if (!sto)
3174 {
9827400b 3175 Log(detector, "GetFromOCDB - Cannot activate main OCDB for query!");
d386d623 3176 return 0;
3177 }
3178
3179 return dynamic_cast<AliCDBEntry*> (sto->Get(path, GetCurrentRun()));
3180}
3181
57c1a579 3182//______________________________________________________________________________________________
3183Bool_t AliShuttle::SendMail()
3184{
9827400b 3185 //
3186 // sends a mail to the subdetector expert in case of preprocessor error
3187 //
3188
3189 if (fTestMode != kNone)
3190 return kTRUE;
57c1a579 3191
36c99a6a 3192 void* dir = gSystem->OpenDirectory(GetShuttleLogDir());
57c1a579 3193 if (dir == NULL)
3194 {
36c99a6a 3195 if (gSystem->mkdir(GetShuttleLogDir(), kTRUE))
57c1a579 3196 {
675f64cd 3197 Log("SHUTTLE", Form("SendMail - Can't open directory <%s>", GetShuttleLogDir()));
57c1a579 3198 return kFALSE;
3199 }
3200
3201 } else {
3202 gSystem->FreeDirectory(dir);
3203 }
3204
3205 TString bodyFileName;
36c99a6a 3206 bodyFileName.Form("%s/mail.body", GetShuttleLogDir());
57c1a579 3207 gSystem->ExpandPathName(bodyFileName);
3208
3209 ofstream mailBody;
3210 mailBody.open(bodyFileName, ofstream::out);
3211
3212 if (!mailBody.is_open())
3213 {
675f64cd 3214 Log("SHUTTLE", Form("Could not open mail body file %s", bodyFileName.Data()));
57c1a579 3215 return kFALSE;
3216 }
3217
3218 TString to="";
3219 TIter iterExperts(fConfig->GetResponsibles(fCurrentDetector));
3220 TObjString *anExpert=0;
3221 while ((anExpert = (TObjString*) iterExperts.Next()))
3222 {
3223 to += Form("%s,", anExpert->GetName());
3224 }
3225 to.Remove(to.Length()-1);
909732f7 3226 AliDebug(2, Form("to: %s",to.Data()));
57c1a579 3227
86aa42c3 3228 if (to.IsNull()) {
c88ad5db 3229 Log("SHUTTLE", "List of detector responsibles not yet set!");
36c99a6a 3230 return kFALSE;
3231 }
3232
57c1a579 3233 TString cc="alberto.colla@cern.ch";
3234
6a1146c4 3235 TString subject = Form("%s Shuttle preprocessor FAILED in run %d (run type = %s)!",
3236 fCurrentDetector.Data(), GetCurrentRun(), GetRunType());
909732f7 3237 AliDebug(2, Form("subject: %s", subject.Data()));
57c1a579 3238
3239 TString body = Form("Dear %s expert(s), \n\n", fCurrentDetector.Data());
3240 body += Form("SHUTTLE just detected that your preprocessor "
6a1146c4 3241 "failed processing run %d (run type = %s)!!\n\n",
3242 GetCurrentRun(), GetRunType());
7d4cf768 3243 body += Form("Please check %s status on the SHUTTLE monitoring page: \n\n",
3244 fCurrentDetector.Data());
b0e53b15 3245 if (fConfig->GetRunMode() == AliShuttleConfig::kTest)
3246 {
3247 body += Form("\thttp://pcalimonitor.cern.ch:8889/shuttle.jsp?time=168 \n\n");
3248 } else {
3249 body += Form("\thttp://pcalimonitor.cern.ch/shuttle.jsp?instance=PROD?time=168 \n\n");
3250 }
3251
7d4cf768 3252
3253 TString logFolder = "logs";
3254 if (fConfig->GetRunMode() == AliShuttleConfig::kProd)
3255 logFolder += "_PROD";
3256
3257
546242fb 3258 body += Form("Find the %s log for the current run on \n\n"
7d4cf768 3259 "\thttp://pcalishuttle01.cern.ch:8880/%s/%d/%s_%d.log \n\n",
3260 fCurrentDetector.Data(), logFolder.Data(), GetCurrentRun(),
3261 fCurrentDetector.Data(), GetCurrentRun());
3262 body += Form("The last 10 lines of %s log file are following:\n\n", fCurrentDetector.Data());
57c1a579 3263
909732f7 3264 AliDebug(2, Form("Body begin: %s", body.Data()));
57c1a579 3265
3266 mailBody << body.Data();
3267 mailBody.close();
3268 mailBody.open(bodyFileName, ofstream::out | ofstream::app);
3269
7d4cf768 3270 TString logFileName = Form("%s/%d/%s_%d.log", GetShuttleLogDir(),
3271 GetCurrentRun(), fCurrentDetector.Data(), GetCurrentRun());
57c1a579 3272 TString tailCommand = Form("tail -n 10 %s >> %s", logFileName.Data(), bodyFileName.Data());
3273 if (gSystem->Exec(tailCommand.Data()))
3274 {
3275 mailBody << Form("%s log file not found ...\n\n", fCurrentDetector.Data());
3276 }
3277
3278 TString endBody = Form("------------------------------------------------------\n\n");
675f64cd 3279 endBody += Form("In case of problems please contact the SHUTTLE core team.\n\n");
3280 endBody += "Please do not answer this message directly, it is automatically generated.\n\n";
3281 endBody += "Greetings,\n\n \t\t\tthe SHUTTLE\n";
3282
3283 AliDebug(2, Form("Body end: %s", endBody.Data()));
3284
3285 mailBody << endBody.Data();
3286
3287 mailBody.close();
3288
3289 // send mail!
3290 TString mailCommand = Form("mail -s \"%s\" -c %s %s < %s",
3291 subject.Data(),
3292 cc.Data(),
3293 to.Data(),
3294 bodyFileName.Data());
3295 AliDebug(2, Form("mail command: %s", mailCommand.Data()));
3296
3297 Bool_t result = gSystem->Exec(mailCommand.Data());
3298
3299 return result == 0;
3300}
3301
3302//______________________________________________________________________________________________
3303Bool_t AliShuttle::SendMailToDCS()
3304{
3305 //
3306 // sends a mail to the DCS experts in case of DCS error
3307 //
3308
3309 if (fTestMode != kNone)
3310 return kTRUE;
3311
3312 void* dir = gSystem->OpenDirectory(GetShuttleLogDir());
3313 if (dir == NULL)
3314 {
3315 if (gSystem->mkdir(GetShuttleLogDir(), kTRUE))
3316 {
3317 Log("SHUTTLE", Form("SendMailToDCS - Can't open directory <%s>", GetShuttleLogDir()));
3318 return kFALSE;
3319 }
3320
3321 } else {
3322 gSystem->FreeDirectory(dir);
3323 }
3324
3325 TString bodyFileName;
3326 bodyFileName.Form("%s/mail.body", GetShuttleLogDir());
3327 gSystem->ExpandPathName(bodyFileName);
3328
3329 ofstream mailBody;
3330 mailBody.open(bodyFileName, ofstream::out);
3331
3332 if (!mailBody.is_open())
3333 {
3334 Log("SHUTTLE", Form("SendMailToDCS - Could not open mail body file %s", bodyFileName.Data()));
3335 return kFALSE;
3336 }
3337
3338 TString to="Vladimir.Fekete@cern.ch, Svetozar.Kapusta@cern.ch";
3339 //TString to="alberto.colla@cern.ch";
3340 AliDebug(2, Form("to: %s",to.Data()));
3341
3342 if (to.IsNull()) {
3343 Log("SHUTTLE", "List of detector responsibles not yet set!");
3344 return kFALSE;
3345 }
3346
3347 TString cc="alberto.colla@cern.ch";
3348
3349 TString subject = Form("Retrieval of data points for %s FAILED in run %d !",
3350 fCurrentDetector.Data(), GetCurrentRun());
3351 AliDebug(2, Form("subject: %s", subject.Data()));
3352
3353 TString body = Form("Dear DCS experts, \n\n");
3354 body += Form("SHUTTLE couldn\'t retrieve the data points for detector %s "
3355 "in run %d!!\n\n", fCurrentDetector.Data(), GetCurrentRun());
7d4cf768 3356 body += Form("Please check %s status on the SHUTTLE monitoring page: \n\n",
3357 fCurrentDetector.Data());
b0e53b15 3358 if (fConfig->GetRunMode() == AliShuttleConfig::kTest)
3359 {
3360 body += Form("\thttp://pcalimonitor.cern.ch:8889/shuttle.jsp?time=168 \n\n");
3361 } else {
3362 body += Form("\thttp://pcalimonitor.cern.ch/shuttle.jsp?instance=PROD?time=168 \n\n");
3363 }
7d4cf768 3364
3365 TString logFolder = "logs";
3366 if (fConfig->GetRunMode() == AliShuttleConfig::kProd)
3367 logFolder += "_PROD";
3368
3369
675f64cd 3370 body += Form("Find the %s log for the current run on \n\n"
7d4cf768 3371 "\thttp://pcalishuttle01.cern.ch:8880/%s/%d/%s_%d.log \n\n",
3372 fCurrentDetector.Data(), logFolder.Data(), GetCurrentRun(),
3373 fCurrentDetector.Data(), GetCurrentRun());
3374 body += Form("The last 10 lines of %s log file are following:\n\n", fCurrentDetector.Data());
675f64cd 3375
3376 AliDebug(2, Form("Body begin: %s", body.Data()));
3377
3378 mailBody << body.Data();
3379 mailBody.close();
3380 mailBody.open(bodyFileName, ofstream::out | ofstream::app);
3381
7d4cf768 3382 TString logFileName = Form("%s/%d/%s_%d.log", GetShuttleLogDir(), GetCurrentRun(),
3383 fCurrentDetector.Data(), GetCurrentRun());
675f64cd 3384 TString tailCommand = Form("tail -n 10 %s >> %s", logFileName.Data(), bodyFileName.Data());
3385 if (gSystem->Exec(tailCommand.Data()))
3386 {
3387 mailBody << Form("%s log file not found ...\n\n", fCurrentDetector.Data());
3388 }
3389
3390 TString endBody = Form("------------------------------------------------------\n\n");
36c99a6a 3391 endBody += Form("In case of problems please contact the SHUTTLE core team.\n\n");
3392 endBody += "Please do not answer this message directly, it is automatically generated.\n\n";
546242fb 3393 endBody += "Greetings,\n\n \t\t\tthe SHUTTLE\n";
57c1a579 3394
909732f7 3395 AliDebug(2, Form("Body end: %s", endBody.Data()));
57c1a579 3396
3397 mailBody << endBody.Data();
3398
3399 mailBody.close();
3400
3401 // send mail!
3402 TString mailCommand = Form("mail -s \"%s\" -c %s %s < %s",
3403 subject.Data(),
3404 cc.Data(),
3405 to.Data(),
3406 bodyFileName.Data());
909732f7 3407 AliDebug(2, Form("mail command: %s", mailCommand.Data()));
57c1a579 3408
3409 Bool_t result = gSystem->Exec(mailCommand.Data());
3410
3411 return result == 0;
3412}
d386d623 3413
441b0e9c 3414//______________________________________________________________________________________________
9827400b 3415const char* AliShuttle::GetRunType()
441b0e9c 3416{
9827400b 3417 //
3418 // returns run type read from "run type" logbook
3419 //
441b0e9c 3420
3421 if(!fLogbookEntry) {
3422 AliError("No logbook entry!");
3423 return 0;
3424 }
3425
9827400b 3426 return fLogbookEntry->GetRunType();
441b0e9c 3427}
3428
4859271b 3429//______________________________________________________________________________________________
3430Bool_t AliShuttle::GetHLTStatus()
3431{
3432 // Return HLT status (ON=1 OFF=0)
3433 // Converts the HLT status from the status string read in the run logbook (not just a bool)
3434
3435 if(!fLogbookEntry) {
3436 AliError("No logbook entry!");
3437 return 0;
3438 }
3439
3440 // TODO implement when HLTStatus is inserted in run logbook
3441 //TString hltStatus = fLogbookEntry->GetRunParameter("HLTStatus");
3442 //if(hltStatus == "OFF") {return kFALSE};
3443
3444 return kTRUE;
3445}
3446
d386d623 3447//______________________________________________________________________________________________
3448void AliShuttle::SetShuttleTempDir(const char* tmpDir)
3449{
9827400b 3450 //
3451 // sets Shuttle temp directory
3452 //
d386d623 3453
3454 fgkShuttleTempDir = gSystem->ExpandPathName(tmpDir);
3455}
3456
3457//______________________________________________________________________________________________
3458void AliShuttle::SetShuttleLogDir(const char* logDir)
3459{
9827400b 3460 //
3461 // sets Shuttle log directory
3462 //
d386d623 3463
3464 fgkShuttleLogDir = gSystem->ExpandPathName(logDir);
3465}