Example file with CTP counters data. To be read by the GRP preprocessor (Roman)
[u/mrichter/AliRoot.git] / SHUTTLE / AliShuttle.cxx
CommitLineData
73abe331 1/**************************************************************************
2 * Copyright(c) 1998-1999, ALICE Experiment at CERN, All rights reserved. *
3 * *
4 * Author: The ALICE Off-line Project. *
5 * Contributors are mentioned in the code where appropriate. *
6 * *
7 * Permission to use, copy, modify and distribute this software and its *
8 * documentation strictly for non-commercial purposes is hereby granted *
9 * without fee, provided that the above copyright notice appears in all *
10 * copies and that both the copyright notice and this permission notice *
11 * appear in the supporting documentation. The authors make no claims *
12 * about the suitability of this software for any purpose. It is *
13 * provided "as is" without express or implied warranty. *
14 **************************************************************************/
15
16/*
17$Log$
b0e53b15 18Revision 1.67 2007/12/07 19:14:36 acolla
19in AliShuttleTrigger:
20
21Added automatic collection of new runs on a regular time basis (settable from the configuration)
22
23in AliShuttleConfig: new members
24
25- triggerWait: time to wait for DIM trigger (s) before starting automatic collection of new runs
26- mode: run mode (test, prod) -> used to build log folder (logs or logs_PROD)
27
28in AliShuttle:
29
30- logs now stored in logs/#RUN/DET_#RUN.log
31
7d4cf768 32Revision 1.66 2007/12/05 10:45:19 jgrosseo
33changed order of arguments to TMonaLisaWriter
34
468df1ce 35Revision 1.65 2007/11/26 16:58:37 acolla
36Monalisa configuration added: host and table name
37
b832ec02 38Revision 1.64 2007/11/13 16:15:47 acolla
39DCS map is stored in a file in the temp folder where the detector is processed.
40If the preprocessor fails, the temp folder is not removed. This will help the debugging of the problem.
41
b1d18693 42Revision 1.63 2007/11/02 10:53:16 acolla
43Protection added to AliShuttle::CopyFileLocally
44
7d43a416 45Revision 1.62 2007/10/31 18:23:13 acolla
46Furter developement on the Shuttle:
47
48- Shuttle now connects to the Grid as alidaq. The OCDB and Reference folders
49are now built from /alice/data, e.g.:
50/alice/data/2007/LHC07a/OCDB
51
52the year and LHC period are taken from the Shuttle.
53Raw metadata files are stored by GRP to:
54/alice/data/2007/LHC07a/<runNb>/Raw/RunMetadata.root
55
56- Shuttle sends a mail to DCS experts each time DP retrieval fails.
57
675f64cd 58Revision 1.61 2007/10/30 20:33:51 acolla
59Improved managing of temporary folders, which weren't correctly handled.
60Resolved bug introduced in StoreReferenceFile, which caused SPD preprocessor fail.
61
d524ade6 62Revision 1.60 2007/10/29 18:06:16 acolla
63
64New function StoreRunMetadataFile added to preprocessor and Shuttle interface
65This function can be used by GRP only. It stores raw data tags merged file to the
66raw data folder (e.g. /alice/data/2008/LHC08a/000099999/Raw).
67
68KNOWN ISSUES:
69
701. Shuttle cannot write to /alice/data/ because it belongs to alidaq. Tag file is stored in /alice/simulation/... for the time being.
712. Due to a bug in TAlien::Mkdir, the creation of a folder in recursive mode (-p option) does not work. The problem
72has been corrected in the root package on the Shuttle machine.
73
c88ad5db 74Revision 1.59 2007/10/05 12:40:55 acolla
75
76Result error code added to AliDCSClient data members (it was "lost" with the new implementation of TMap* GetAliasValues and GetDPValues).
77
1790d4b7 78Revision 1.58 2007/09/28 15:27:40 acolla
79
80AliDCSClient "multiSplit" option added in the DCS configuration
81in AliDCSMessage: variable MAX_BODY_SIZE set to 500000
82
542b6cc8 83Revision 1.57 2007/09/27 16:53:13 acolla
84Detectors can have more than one AMANDA server. SHUTTLE queries the servers sequentially,
85merges the dcs aliases/DPs in one TMap and sends it to the preprocessor.
86
1d172743 87Revision 1.56 2007/09/14 16:46:14 jgrosseo
881) Connect and Close are called before and after each query, so one can
89keep the same AliDCSClient object.
902) The splitting of a query is moved to GetDPValues/GetAliasValues.
913) Splitting interval can be specified in constructor
92
b41b252a 93Revision 1.55 2007/08/06 12:26:40 acolla
94Function Bool_t GetHLTStatus added to preprocessor. It returns the status of HLT
95read from the run logbook.
96
4859271b 97Revision 1.54 2007/07/12 09:51:25 jgrosseo
98removed duplicated log message in GetFile
99
4f0749a8 100Revision 1.53 2007/07/12 09:26:28 jgrosseo
101updating hlt fxs base path
102
42fde080 103Revision 1.52 2007/07/12 08:06:45 jgrosseo
104adding log messages in getfile... functions
105adding not implemented copy constructor in alishuttleconfigholder
106
1bcd28db 107Revision 1.51 2007/07/03 17:24:52 acolla
108root moved to v5-16-00. TFileMerger->Cp moved to TFile::Cp.
109
a986b218 110Revision 1.50 2007/07/02 17:19:32 acolla
111preprocessor is run in a temp directory that is removed when process is finished.
112
5bac2bde 113Revision 1.49 2007/06/29 10:45:06 acolla
114Number of columns in MySql Shuttle logbook increased by one (HLT added)
115
db99d43e 116Revision 1.48 2007/06/21 13:06:19 acolla
117GetFileSources returns dummy list with 1 source if system=DCS (better than
118returning error as it was)
119
6297b37d 120Revision 1.47 2007/06/19 17:28:56 acolla
121HLT updated; missing map bug removed.
122
dc25836b 123Revision 1.46 2007/06/09 13:01:09 jgrosseo
124Switching to retrieval of several DCS DPs at a time (multiDPrequest)
125
a038aa70 126Revision 1.45 2007/05/30 06:35:20 jgrosseo
127Adding functionality to the Shuttle/TestShuttle:
128o) Function to retrieve list of sources from a given system (GetFileSources with id=0)
129o) Function to retrieve list of IDs for a given source (GetFileIDs)
130These functions are needed for dealing with the tag files that are saved for the GRP preprocessor
131Example code has been added to the TestProcessor in TestShuttle
132
4a33bdd9 133Revision 1.44 2007/05/11 16:09:32 acolla
134Reference files for ITS, MUON and PHOS are now stored in OfflineDetName/OnlineDetName/run_...
135example: ITS/SPD/100_filename.root
136
2d9019b4 137Revision 1.43 2007/05/10 09:59:51 acolla
138Various bug fixes in StoreRefFilesToGrid; Cleaning of reference storage before processing detector (CleanReferenceStorage)
139
546242fb 140Revision 1.42 2007/05/03 08:01:39 jgrosseo
141typo in last commit :-(
142
8b739301 143Revision 1.41 2007/05/03 08:00:48 jgrosseo
144fixing log message when pp want to skip dcs value retrieval
145
651fdaab 146Revision 1.40 2007/04/27 07:06:48 jgrosseo
147GetFileSources returns empty list in case of no files, but successful query
148No mails sent in testmode
149
86aa42c3 150Revision 1.39 2007/04/17 12:43:57 acolla
151Correction in StoreOCDB; change of text in mail to detector expert
152
26758fce 153Revision 1.38 2007/04/12 08:26:18 jgrosseo
154updated comment
155
3c2a21c8 156Revision 1.37 2007/04/10 16:53:14 jgrosseo
157redirecting sub detector stdout, stderr to sub detector log file
158
3d8bc902 159Revision 1.35 2007/04/04 16:26:38 acolla
1601. Re-organization of function calls in TestPreprocessor to make it more meaningful.
1612. Added missing dependency in test preprocessors.
1623. in AliShuttle.cxx: processing time and memory consumption info on a single line.
163
886d60e6 164Revision 1.34 2007/04/04 10:33:36 jgrosseo
1651) Storing of files to the Grid is now done _after_ your preprocessors succeeded. This is transparent, which means that you can still use the same functions (Store, StoreReferenceData) to store files to the Grid. However, the Shuttle first stores them locally and transfers them after the preprocessor finished. The return code of these two functions has changed from UInt_t to Bool_t which gives you the success of the storing.
166In case of an error with the Grid, the Shuttle will retry the storing later, the preprocessor does not need to be run again.
167
1682) The meaning of the return code of the preprocessor has changed. 0 is now success and any other value means failure. This value is stored in the log and you can use it to keep details about the error condition.
169
1703) New function StoreReferenceFile to _directly_ store a file (without opening it) to the reference storage.
171
1724) The memory usage of the preprocessor is monitored. If it exceeds 2 GB it is terminated.
173
1745) New function AliPreprocessor::ProcessDCS(). If you do not need to have DCS data in all cases, you can skip the processing by implemting this function and returning kFALSE under certain conditions. E.g. if there is a certain run type.
175If you always need DCS data (like before), you do not need to implement it.
176
1776) The run type has been added to the monitoring page
178
9827400b 179Revision 1.33 2007/04/03 13:56:01 acolla
180Grid Storage at the end of preprocessing. Added virtual method to disable DCS query according to the
181run type.
182
3301427a 183Revision 1.32 2007/02/28 10:41:56 acolla
184Run type field added in SHUTTLE framework. Run type is read from "run type" logbook and retrieved by
185AliPreprocessor::GetRunType() function.
186Added some ldap definition files.
187
d386d623 188Revision 1.30 2007/02/13 11:23:21 acolla
189Moved getters and setters of Shuttle's main OCDB/Reference, local
190OCDB/Reference, temp and log folders to AliShuttleInterface
191
9d733021 192Revision 1.27 2007/01/30 17:52:42 jgrosseo
193adding monalisa monitoring
194
e7f62f16 195Revision 1.26 2007/01/23 19:20:03 acolla
196Removed old ldif files, added TOF, MCH ldif files. Added some options in
197AliShuttleConfig::Print. Added in Ali Shuttle: SetShuttleTempDir and
198SetShuttleLogDir
199
36c99a6a 200Revision 1.25 2007/01/15 19:13:52 acolla
201Moved some AliInfo to AliDebug in SendMail function
202
fc5a4708 203Revision 1.21 2006/12/07 08:51:26 jgrosseo
204update (alberto):
205table, db names in ldap configuration
206added GRP preprocessor
207DCS data can also be retrieved by data point
208
2c15234c 209Revision 1.20 2006/11/16 16:16:48 jgrosseo
210introducing strict run ordering flag
211removed giving preprocessor name to preprocessor, they have to know their name themselves ;-)
212
be48e3ea 213Revision 1.19 2006/11/06 14:23:04 jgrosseo
214major update (Alberto)
215o) reading of run parameters from the logbook
216o) online offline naming conversion
217o) standalone DCSclient package
218
eba76848 219Revision 1.18 2006/10/20 15:22:59 jgrosseo
220o) Adding time out to the execution of the preprocessors: The Shuttle forks and the parent process monitors the child
221o) Merging Collect, CollectAll, CollectNew function
222o) Removing implementation of empty copy constructors (declaration still there!)
223
cb343cfd 224Revision 1.17 2006/10/05 16:20:55 jgrosseo
225adapting to new CDB classes
226
6ec0e06c 227Revision 1.16 2006/10/05 15:46:26 jgrosseo
228applying to the new interface
229
481441a2 230Revision 1.15 2006/10/02 16:38:39 jgrosseo
231update (alberto):
232fixed memory leaks
233storing of objects that failed to be stored to the grid before
234interfacing of shuttle status table in daq system
235
2bb7b766 236Revision 1.14 2006/08/29 09:16:05 jgrosseo
237small update
238
85a80aa9 239Revision 1.13 2006/08/15 10:50:00 jgrosseo
240effc++ corrections (alberto)
241
4f0ab988 242Revision 1.12 2006/08/08 14:19:29 jgrosseo
243Update to shuttle classes (Alberto)
244
245- Possibility to set the full object's path in the Preprocessor's and
246Shuttle's Store functions
247- Possibility to extend the object's run validity in the same classes
248("startValidity" and "validityInfinite" parameters)
249- Implementation of the StoreReferenceData function to store reference
250data in a dedicated CDB storage.
251
84090f85 252Revision 1.11 2006/07/21 07:37:20 jgrosseo
253last run is stored after each run
254
7bfb2090 255Revision 1.10 2006/07/20 09:54:40 jgrosseo
256introducing status management: The processing per subdetector is divided into several steps,
257after each step the status is stored on disk. If the system crashes in any of the steps the Shuttle
258can keep track of the number of failures and skips further processing after a certain threshold is
259exceeded. These thresholds can be configured in LDAP.
260
5164a766 261Revision 1.9 2006/07/19 10:09:55 jgrosseo
262new configuration, accesst to DAQ FES (Alberto)
263
57f50b3c 264Revision 1.8 2006/07/11 12:44:36 jgrosseo
265adding parameters for extended validity range of data produced by preprocessor
266
17111222 267Revision 1.7 2006/07/10 14:37:09 jgrosseo
268small fix + todo comment
269
e090413b 270Revision 1.6 2006/07/10 13:01:41 jgrosseo
271enhanced storing of last sucessfully processed run (alberto)
272
a7160fe9 273Revision 1.5 2006/07/04 14:59:57 jgrosseo
274revision of AliDCSValue: Removed wrapper classes, reduced storage size per value by factor 2
275
45a493ce 276Revision 1.4 2006/06/12 09:11:16 jgrosseo
277coding conventions (Alberto)
278
58bc3020 279Revision 1.3 2006/06/06 14:26:40 jgrosseo
280o) removed files that were moved to STEER
281o) shuttle updated to follow the new interface (Alberto)
282
b948db8d 283Revision 1.2 2006/03/07 07:52:34 hristov
284New version (B.Yordanov)
285
d477ad88 286Revision 1.6 2005/11/19 17:19:14 byordano
287RetrieveDATEEntries and RetrieveConditionsData added
288
289Revision 1.5 2005/11/19 11:09:27 byordano
290AliShuttle declaration added
291
292Revision 1.4 2005/11/17 17:47:34 byordano
293TList changed to TObjArray
294
295Revision 1.3 2005/11/17 14:43:23 byordano
296import to local CVS
297
298Revision 1.1.1.1 2005/10/28 07:33:58 hristov
299Initial import as subdirectory in AliRoot
300
73abe331 301Revision 1.2 2005/09/13 08:41:15 byordano
302default startTime endTime added
303
304Revision 1.4 2005/08/30 09:13:02 byordano
305some docs added
306
307Revision 1.3 2005/08/29 21:15:47 byordano
308some docs added
309
310*/
311
312//
313// This class is the main manager for AliShuttle.
314// It organizes the data retrieval from DCS and call the
b948db8d 315// interface methods of AliPreprocessor.
73abe331 316// For every detector in AliShuttleConfgi (see AliShuttleConfig),
317// data for its set of aliases is retrieved. If there is registered
b948db8d 318// AliPreprocessor for this detector then it will be used
319// accroding to the schema (see AliPreprocessor).
320// If there isn't registered AliPreprocessor than the retrieved
73abe331 321// data is stored automatically to the undelying AliCDBStorage.
322// For detSpec is used the alias name.
323//
324
325#include "AliShuttle.h"
326
327#include "AliCDBManager.h"
328#include "AliCDBStorage.h"
329#include "AliCDBId.h"
84090f85 330#include "AliCDBRunRange.h"
331#include "AliCDBPath.h"
5164a766 332#include "AliCDBEntry.h"
73abe331 333#include "AliShuttleConfig.h"
eba76848 334#include "DCSClient/AliDCSClient.h"
73abe331 335#include "AliLog.h"
b948db8d 336#include "AliPreprocessor.h"
5164a766 337#include "AliShuttleStatus.h"
2bb7b766 338#include "AliShuttleLogbookEntry.h"
73abe331 339
57f50b3c 340#include <TSystem.h>
58bc3020 341#include <TObject.h>
b948db8d 342#include <TString.h>
57f50b3c 343#include <TTimeStamp.h>
73abe331 344#include <TObjString.h>
57f50b3c 345#include <TSQLServer.h>
346#include <TSQLResult.h>
347#include <TSQLRow.h>
cb343cfd 348#include <TMutex.h>
9827400b 349#include <TSystemDirectory.h>
350#include <TSystemFile.h>
a986b218 351#include <TFile.h>
9827400b 352#include <TGrid.h>
353#include <TGridResult.h>
73abe331 354
e7f62f16 355#include <TMonaLisaWriter.h>
356
5164a766 357#include <fstream>
358
cb343cfd 359#include <sys/types.h>
360#include <sys/wait.h>
361
73abe331 362ClassImp(AliShuttle)
363
b948db8d 364//______________________________________________________________________________________________
365AliShuttle::AliShuttle(const AliShuttleConfig* config,
366 UInt_t timeout, Int_t retries):
4f0ab988 367fConfig(config),
368fTimeout(timeout), fRetries(retries),
369fPreprocessorMap(),
2bb7b766 370fLogbookEntry(0),
eba76848 371fCurrentDetector(),
85a80aa9 372fStatusEntry(0),
cb343cfd 373fMonitoringMutex(0),
eba76848 374fLastActionTime(0),
e7f62f16 375fLastAction(),
9827400b 376fMonaLisa(0),
377fTestMode(kNone),
ffa29e93 378fReadTestMode(kFALSE),
379fOutputRedirected(kFALSE)
73abe331 380{
381 //
382 // config: AliShuttleConfig used
73abe331 383 // timeout: timeout used for AliDCSClient connection
384 // retries: the number of retries in case of connection error.
385 //
386
57f50b3c 387 if (!fConfig->IsValid()) AliFatal("********** !!!!! Invalid configuration !!!!! **********");
be48e3ea 388 for(int iSys=0;iSys<4;iSys++) {
57f50b3c 389 fServer[iSys]=0;
be48e3ea 390 if (iSys < 3)
2c15234c 391 fFXSlist[iSys].SetOwner(kTRUE);
57f50b3c 392 }
2bb7b766 393 fPreprocessorMap.SetOwner(kTRUE);
be48e3ea 394
395 for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
396 fFirstUnprocessed[iDet] = kFALSE;
397
cb343cfd 398 fMonitoringMutex = new TMutex();
58bc3020 399}
400
b948db8d 401//______________________________________________________________________________________________
57f50b3c 402AliShuttle::~AliShuttle()
58bc3020 403{
9827400b 404 //
405 // destructor
406 //
58bc3020 407
b948db8d 408 fPreprocessorMap.DeleteAll();
be48e3ea 409 for(int iSys=0;iSys<4;iSys++)
57f50b3c 410 if(fServer[iSys]) {
411 fServer[iSys]->Close();
412 delete fServer[iSys];
eba76848 413 fServer[iSys] = 0;
57f50b3c 414 }
2bb7b766 415
416 if (fStatusEntry){
417 delete fStatusEntry;
418 fStatusEntry = 0;
419 }
cb343cfd 420
421 if (fMonitoringMutex)
422 {
423 delete fMonitoringMutex;
424 fMonitoringMutex = 0;
425 }
73abe331 426}
427
b948db8d 428//______________________________________________________________________________________________
57f50b3c 429void AliShuttle::RegisterPreprocessor(AliPreprocessor* preprocessor)
58bc3020 430{
73abe331 431 //
b948db8d 432 // Registers new AliPreprocessor.
73abe331 433 // It uses GetName() for indentificator of the pre processor.
434 // The pre processor is registered it there isn't any other
435 // with the same identificator (GetName()).
436 //
437
eba76848 438 const char* detName = preprocessor->GetName();
439 if(GetDetPos(detName) < 0)
440 AliFatal(Form("********** !!!!! Invalid detector name: %s !!!!! **********", detName));
441
442 if (fPreprocessorMap.GetValue(detName)) {
443 AliWarning(Form("AliPreprocessor %s is already registered!", detName));
73abe331 444 return;
445 }
446
eba76848 447 fPreprocessorMap.Add(new TObjString(detName), preprocessor);
73abe331 448}
b948db8d 449//______________________________________________________________________________________________
3301427a 450Bool_t AliShuttle::Store(const AliCDBPath& path, TObject* object,
84090f85 451 AliCDBMetaData* metaData, Int_t validityStart, Bool_t validityInfinite)
73abe331 452{
9827400b 453 // Stores a CDB object in the storage for offline reconstruction. Objects that are not needed for
454 // offline reconstruction, but should be stored anyway (e.g. for debugging) should NOT be stored
455 // using this function. Use StoreReferenceData instead!
456 // It calls StoreLocally function which temporarily stores the data locally; when the preprocessor
457 // finishes the data are transferred to the main storage (Grid).
b948db8d 458
3301427a 459 return StoreLocally(fgkLocalCDB, path, object, metaData, validityStart, validityInfinite);
84090f85 460}
461
462//______________________________________________________________________________________________
3301427a 463Bool_t AliShuttle::StoreReferenceData(const AliCDBPath& path, TObject* object, AliCDBMetaData* metaData)
84090f85 464{
9827400b 465 // Stores a CDB object in the storage for reference data. This objects will not be available during
466 // offline reconstrunction. Use this function for reference data only!
467 // It calls StoreLocally function which temporarily stores the data locally; when the preprocessor
468 // finishes the data are transferred to the main storage (Grid).
85a80aa9 469
3301427a 470 return StoreLocally(fgkLocalRefStorage, path, object, metaData);
85a80aa9 471}
472
473//______________________________________________________________________________________________
3301427a 474Bool_t AliShuttle::StoreLocally(const TString& localUri,
85a80aa9 475 const AliCDBPath& path, TObject* object, AliCDBMetaData* metaData,
476 Int_t validityStart, Bool_t validityInfinite)
477{
9827400b 478 // Store object temporarily in local storage. Parameters are passed by Store and StoreReferenceData functions.
479 // when the preprocessor finishes the data are transferred to the main storage (Grid).
480 // The parameters are:
481 // 1) Uri of the backup storage (Local)
482 // 2) the object's path.
483 // 3) the object to be stored
484 // 4) the metaData to be associated with the object
485 // 5) the validity start run number w.r.t. the current run,
486 // if the data is valid only for this run leave the default 0
487 // 6) specifies if the calibration data is valid for infinity (this means until updated),
488 // typical for calibration runs, the default is kFALSE
489 //
490 // returns 0 if fail, 1 otherwise
84090f85 491
9827400b 492 if (fTestMode & kErrorStorage)
493 {
494 Log(fCurrentDetector, "StoreLocally - In TESTMODE - Simulating error while storing locally");
495 return kFALSE;
496 }
497
3301427a 498 const char* cdbType = (localUri == fgkLocalCDB) ? "CDB" : "Reference";
2bb7b766 499
85a80aa9 500 Int_t firstRun = GetCurrentRun() - validityStart;
84090f85 501 if(firstRun < 0) {
9827400b 502 AliWarning("First valid run happens to be less than 0! Setting it to 0.");
84090f85 503 firstRun=0;
504 }
505
506 Int_t lastRun = -1;
507 if(validityInfinite) {
508 lastRun = AliCDBRunRange::Infinity();
509 } else {
510 lastRun = GetCurrentRun();
511 }
512
3301427a 513 // Version is set to current run, it will be used later to transfer data to Grid
514 AliCDBId id(path, firstRun, lastRun, GetCurrentRun(), -1);
2bb7b766 515
516 if(! dynamic_cast<TObjString*> (metaData->GetProperty("RunUsed(TObjString)"))){
517 TObjString runUsed = Form("%d", GetCurrentRun());
9e080f92 518 metaData->SetProperty("RunUsed(TObjString)", runUsed.Clone());
2bb7b766 519 }
84090f85 520
3301427a 521 Bool_t result = kFALSE;
84090f85 522
3301427a 523 if (!(AliCDBManager::Instance()->GetStorage(localUri))) {
524 Log("SHUTTLE", Form("StoreLocally - Cannot activate local %s storage", cdbType));
84090f85 525 } else {
3301427a 526 result = AliCDBManager::Instance()->GetStorage(localUri)
84090f85 527 ->Put(object, id, metaData);
528 }
529
530 if(!result) {
531
9827400b 532 Log(fCurrentDetector, Form("StoreLocally - Can't store object <%s>!", id.ToString().Data()));
3301427a 533 }
2bb7b766 534
3301427a 535 return result;
536}
84090f85 537
3301427a 538//______________________________________________________________________________________________
539Bool_t AliShuttle::StoreOCDB()
540{
9827400b 541 //
542 // Called when preprocessor ends successfully or when previous storage attempt failed (kStoreError status)
543 // Calls underlying StoreOCDB(const char*) function twice, for OCDB and Reference storage.
544 // Then calls StoreRefFilesToGrid to store reference files.
545 //
546
547 if (fTestMode & kErrorGrid)
548 {
549 Log("SHUTTLE", "StoreOCDB - In TESTMODE - Simulating error while storing in the Grid");
550 Log(fCurrentDetector, "StoreOCDB - In TESTMODE - Simulating error while storing in the Grid");
551 return kFALSE;
552 }
553
c88ad5db 554 Log("SHUTTLE","StoreOCDB - Storing OCDB data ...");
86aa42c3 555 Bool_t resultCDB = StoreOCDB(fgkMainCDB);
556
c88ad5db 557 Log("SHUTTLE","StoreOCDB - Storing reference data ...");
3301427a 558 Bool_t resultRef = StoreOCDB(fgkMainRefStorage);
9827400b 559
c88ad5db 560 Log("SHUTTLE","StoreOCDB - Storing reference files ...");
561 Bool_t resultRefFiles = CopyFilesToGrid("reference");
562
563 Bool_t resultMetadata = kTRUE;
564 if(fCurrentDetector == "GRP")
565 {
566 Log("StoreOCDB - SHUTTLE","Storing Run Metadata file ...");
567 resultMetadata = CopyFilesToGrid("metadata");
568 }
9827400b 569
c88ad5db 570 return resultCDB && resultRef && resultRefFiles && resultMetadata;
3301427a 571}
572
573//______________________________________________________________________________________________
574Bool_t AliShuttle::StoreOCDB(const TString& gridURI)
575{
576 //
577 // Called by StoreOCDB(), performs actual storage to the main OCDB and reference storages (Grid)
578 //
579
580 TObjArray* gridIds=0;
581
582 Bool_t result = kTRUE;
583
584 const char* type = 0;
585 TString localURI;
586 if(gridURI == fgkMainCDB) {
587 type = "OCDB";
588 localURI = fgkLocalCDB;
589 } else if(gridURI == fgkMainRefStorage) {
590 type = "reference";
591 localURI = fgkLocalRefStorage;
592 } else {
593 AliError(Form("Invalid storage URI: %s", gridURI.Data()));
594 return kFALSE;
595 }
596
597 AliCDBManager* man = AliCDBManager::Instance();
598
599 AliCDBStorage *gridSto = man->GetStorage(gridURI);
600 if(!gridSto) {
601 Log("SHUTTLE",
602 Form("StoreOCDB - cannot activate main %s storage", type));
603 return kFALSE;
604 }
605
606 gridIds = gridSto->GetQueryCDBList();
607
608 // get objects previously stored in local CDB
609 AliCDBStorage *localSto = man->GetStorage(localURI);
610 if(!localSto) {
611 Log("SHUTTLE",
612 Form("StoreOCDB - cannot activate local %s storage", type));
613 return kFALSE;
614 }
615 AliCDBPath aPath(GetOfflineDetName(fCurrentDetector.Data()),"*","*");
616 // Local objects were stored with current run as Grid version!
617 TList* localEntries = localSto->GetAll(aPath.GetPath(), GetCurrentRun(), GetCurrentRun());
618 localEntries->SetOwner(1);
619
620 // loop on local stored objects
621 TIter localIter(localEntries);
622 AliCDBEntry *aLocEntry = 0;
623 while((aLocEntry = dynamic_cast<AliCDBEntry*> (localIter.Next()))){
624 aLocEntry->SetOwner(1);
625 AliCDBId aLocId = aLocEntry->GetId();
626 aLocEntry->SetVersion(-1);
627 aLocEntry->SetSubVersion(-1);
628
629 // If local object is valid up to infinity we store it only if it is
630 // the first unprocessed run!
631 if (aLocId.GetLastRun() == AliCDBRunRange::Infinity() &&
632 !fFirstUnprocessed[GetDetPos(fCurrentDetector)])
633 {
634 Log("SHUTTLE", Form("StoreOCDB - %s: object %s has validity infinite but "
635 "there are previous unprocessed runs!",
636 fCurrentDetector.Data(), aLocId.GetPath().Data()));
637 continue;
638 }
639
640 // loop on Grid valid Id's
641 Bool_t store = kTRUE;
642 TIter gridIter(gridIds);
643 AliCDBId* aGridId = 0;
644 while((aGridId = dynamic_cast<AliCDBId*> (gridIter.Next()))){
645 if(aGridId->GetPath() != aLocId.GetPath()) continue;
646 // skip all objects valid up to infinity
647 if(aGridId->GetLastRun() == AliCDBRunRange::Infinity()) continue;
648 // if we get here, it means there's already some more recent object stored on Grid!
649 store = kFALSE;
650 break;
651 }
652
653 // If we get here, the file can be stored!
654 Bool_t storeOk = gridSto->Put(aLocEntry);
655 if(!store || storeOk){
656
657 if (!store)
658 {
659 Log(fCurrentDetector.Data(),
660 Form("StoreOCDB - A more recent object already exists in %s storage: <%s>",
661 type, aGridId->ToString().Data()));
662 } else {
663 Log("SHUTTLE",
664 Form("StoreOCDB - Object <%s> successfully put into %s storage",
665 aLocId.ToString().Data(), type));
2d9019b4 666 Log(fCurrentDetector.Data(),
667 Form("StoreOCDB - Object <%s> successfully put into %s storage",
668 aLocId.ToString().Data(), type));
3301427a 669 }
84090f85 670
3301427a 671 // removing local filename...
672 TString filename;
673 localSto->IdToFilename(aLocId, filename);
c88ad5db 674 Log("SHUTTLE", Form("StoreOCDB - Removing local file %s", filename.Data()));
3301427a 675 RemoveFile(filename.Data());
676 continue;
677 } else {
678 Log("SHUTTLE",
679 Form("StoreOCDB - Grid %s storage of object <%s> failed",
680 type, aLocId.ToString().Data()));
2d9019b4 681 Log(fCurrentDetector.Data(),
682 Form("StoreOCDB - Grid %s storage of object <%s> failed",
683 type, aLocId.ToString().Data()));
3301427a 684 result = kFALSE;
b948db8d 685 }
686 }
3301427a 687 localEntries->Clear();
2bb7b766 688
b948db8d 689 return result;
3301427a 690}
691
692//______________________________________________________________________________________________
546242fb 693Bool_t AliShuttle::CleanReferenceStorage(const char* detector)
694{
2d9019b4 695 // clears the directory used to store reference files of a given subdetector
546242fb 696
697 AliCDBManager* man = AliCDBManager::Instance();
698 AliCDBStorage* sto = man->GetStorage(fgkLocalRefStorage);
2d9019b4 699 TString localBaseFolder = sto->GetBaseFolder();
700
701 TString targetDir = GetRefFilePrefix(localBaseFolder.Data(), detector);
702
d524ade6 703 Log("SHUTTLE", Form("CleanReferenceStorage - Cleaning %s", targetDir.Data()));
2d9019b4 704
705 TString begin;
706 begin.Form("%d_", GetCurrentRun());
707
708 TSystemDirectory* baseDir = new TSystemDirectory("/", targetDir);
709 if (!baseDir)
710 return kTRUE;
711
712 TList* dirList = baseDir->GetListOfFiles();
713 delete baseDir;
714
715 if (!dirList) return kTRUE;
716
717 if (dirList->GetEntries() < 3)
718 {
719 delete dirList;
720 return kTRUE;
721 }
722
723 Int_t nDirs = 0, nDel = 0;
724 TIter dirIter(dirList);
725 TSystemFile* entry = 0;
546242fb 726
2d9019b4 727 Bool_t success = kTRUE;
546242fb 728
2d9019b4 729 while ((entry = dynamic_cast<TSystemFile*> (dirIter.Next())))
730 {
731 if (entry->IsDirectory())
732 continue;
733
734 TString fileName(entry->GetName());
735 if (!fileName.BeginsWith(begin))
736 continue;
737
738 nDirs++;
739
740 // delete file
741 Int_t result = gSystem->Unlink(fileName.Data());
742
743 if (result)
744 {
d524ade6 745 Log("SHUTTLE", Form("CleanReferenceStorage - Could not delete file %s!", fileName.Data()));
2d9019b4 746 success = kFALSE;
747 } else {
748 nDel++;
749 }
750 }
751
752 if(nDirs > 0)
753 Log("SHUTTLE", Form("CleanReferenceStorage - %d (over %d) reference files in folder %s were deleted.",
754 nDel, nDirs, targetDir.Data()));
755
756
757 delete dirList;
758 return success;
759
760
761
762
763
546242fb 764
765 Int_t result = gSystem->GetPathInfo(targetDir, 0, (Long64_t*) 0, 0, 0);
766 if (result == 0)
767 {
768 // delete directory
d524ade6 769 result = gSystem->Exec(Form("rm -rf %s", targetDir.Data()));
546242fb 770 if (result != 0)
771 {
d524ade6 772 Log("SHUTTLE", Form("CleanReferenceStorage - Could not clean directory %s", targetDir.Data()));
546242fb 773 return kFALSE;
774 }
775 }
776
777 result = gSystem->mkdir(targetDir, kTRUE);
778 if (result != 0)
779 {
c88ad5db 780 Log("SHUTTLE", Form("CleanReferenceStorage - Error creating base directory %s", targetDir.Data()));
546242fb 781 return kFALSE;
782 }
783
784 return kTRUE;
785}
786
787//______________________________________________________________________________________________
9827400b 788Bool_t AliShuttle::StoreReferenceFile(const char* detector, const char* localFile, const char* gridFileName)
789{
790 //
3c2a21c8 791 // Stores reference file directly (without opening it). This function stores the file locally.
9827400b 792 //
3c2a21c8 793 // The file is stored under the following location:
794 // <base folder of local reference storage>/<DET>/<RUN#>_<gridFileName>
795 // where <gridFileName> is the second parameter given to the function
796 //
9827400b 797
798 if (fTestMode & kErrorStorage)
799 {
800 Log(fCurrentDetector, "StoreReferenceFile - In TESTMODE - Simulating error while storing locally");
801 return kFALSE;
802 }
803
804 AliCDBManager* man = AliCDBManager::Instance();
805 AliCDBStorage* sto = man->GetStorage(fgkLocalRefStorage);
806
807 TString localBaseFolder = sto->GetBaseFolder();
808
d524ade6 809 TString target = GetRefFilePrefix(localBaseFolder.Data(), detector);
810 target.Append(Form("/%d_%s", GetCurrentRun(), gridFileName));
9827400b 811
d524ade6 812 return CopyFileLocally(localFile, target);
c88ad5db 813}
814
815//______________________________________________________________________________________________
816Bool_t AliShuttle::StoreRunMetadataFile(const char* localFile, const char* gridFileName)
817{
818 //
819 // Stores Run metadata file to the Grid, in the run folder
820 //
821 // Only GRP can call this function.
822
823 if (fTestMode & kErrorStorage)
824 {
825 Log(fCurrentDetector, "StoreRunMetaDataFile - In TESTMODE - Simulating error while storing locally");
826 return kFALSE;
827 }
828
829 AliCDBManager* man = AliCDBManager::Instance();
830 AliCDBStorage* sto = man->GetStorage(fgkLocalRefStorage);
831
832 TString localBaseFolder = sto->GetBaseFolder();
833
834 // Build Run level folder
835 // folder = /alice/data/year/lhcPeriod/runNb/Raw
836
c88ad5db 837
675f64cd 838 TString lhcPeriod = GetLHCPeriod();
c88ad5db 839 if (lhcPeriod.Length() == 0)
840 {
841 Log("SHUTTLE","StoreRunMetaDataFile - LHCPeriod not found in logbook!");
842 return 0;
843 }
844
675f64cd 845 TString target = Form("%s/GRP/RunMetadata/alice/data/%d/%s/%09d/Raw/%s",
846 localBaseFolder.Data(), GetCurrentYear(),
d524ade6 847 lhcPeriod.Data(), GetCurrentRun(), gridFileName);
c88ad5db 848
d524ade6 849 return CopyFileLocally(localFile, target);
c88ad5db 850}
851
852//______________________________________________________________________________________________
d524ade6 853Bool_t AliShuttle::CopyFileLocally(const char* localFile, const TString& target)
c88ad5db 854{
855 //
856 // Stores file locally. Called by StoreReferenceFile and StoreRunMetadataFile
d524ade6 857 // Files are temporarily stored in the local reference storage. When the preprocessor
858 // finishes, the Shuttle calls CopyFilesToGrid to transfer the files to AliEn
859 // (in reference or run level folders)
c88ad5db 860 //
861
d524ade6 862 TString targetDir(target(0, target.Last('/')));
863
864 //try to open base dir folder, if it does not exist
2d9019b4 865 void* dir = gSystem->OpenDirectory(targetDir.Data());
866 if (dir == NULL) {
867 if (gSystem->mkdir(targetDir.Data(), kTRUE)) {
c88ad5db 868 Log("SHUTTLE", Form("StoreFileLocally - Can't open directory <%s>", targetDir.Data()));
2d9019b4 869 return kFALSE;
870 }
871
872 } else {
873 gSystem->FreeDirectory(dir);
874 }
9827400b 875
7d43a416 876 Int_t result = 0;
877
878 result = gSystem->GetPathInfo(localFile, 0, (Long64_t*) 0, 0, 0);
9827400b 879 if (result)
880 {
c88ad5db 881 Log("SHUTTLE", Form("StoreFileLocally - %s does not exist", localFile));
546242fb 882 return kFALSE;
9827400b 883 }
546242fb 884
7d43a416 885 result = gSystem->GetPathInfo(target, 0, (Long64_t*) 0, 0, 0);
886 if (!result)
887 {
888 Log("SHUTTLE", Form("StoreFileLocally - target file %s already exist, removing...", target.Data()));
889 if (gSystem->Unlink(target.Data()))
890 {
891 Log("SHUTTLE", Form("StoreFileLocally - Could not remove existing target file %s!", target.Data()));
892 return kFALSE;
893 }
894 }
895
9827400b 896 result = gSystem->CopyFile(localFile, target);
897
898 if (result == 0)
899 {
c88ad5db 900 Log("SHUTTLE", Form("StoreFileLocally - File %s stored locally to %s", localFile, target.Data()));
9827400b 901 return kTRUE;
902 }
903 else
904 {
d524ade6 905 Log("SHUTTLE", Form("StoreFileLocally - Could not store file %s to %s! Error code = %d",
546242fb 906 localFile, target.Data(), result));
9827400b 907 return kFALSE;
908 }
c88ad5db 909
910
911
9827400b 912}
913
914//______________________________________________________________________________________________
c88ad5db 915Bool_t AliShuttle::CopyFilesToGrid(const char* type)
9827400b 916{
917 //
c88ad5db 918 // Transfers local files to the Grid. Local files can be reference files
919 // or run metadata file (from GRP only).
9827400b 920 //
c88ad5db 921 // According to the type (ref, metadata) the files are stored under the following location:
922 // ref --> <base folder of reference storage>/<DET>/<RUN#>_<gridFileName>
923 // metadata --> <run data folder>/<MetadataFileName>
86aa42c3 924 //
c88ad5db 925
9827400b 926 AliCDBManager* man = AliCDBManager::Instance();
927 AliCDBStorage* sto = man->GetStorage(fgkLocalRefStorage);
928 if (!sto)
929 return kFALSE;
930 TString localBaseFolder = sto->GetBaseFolder();
9827400b 931
c88ad5db 932 TString dir;
933 TString alienDir;
9827400b 934 TString begin;
9827400b 935
c88ad5db 936 if (strcmp(type, "reference") == 0)
937 {
938 dir = GetRefFilePrefix(localBaseFolder.Data(), fCurrentDetector.Data());
939 AliCDBStorage* gridSto = man->GetStorage(fgkMainRefStorage);
940 if (!gridSto)
941 return kFALSE;
942 TString gridBaseFolder = gridSto->GetBaseFolder();
943 alienDir = GetRefFilePrefix(gridBaseFolder.Data(), fCurrentDetector.Data());
944 begin = Form("%d_", GetCurrentRun());
945 }
946 else if (strcmp(type, "metadata") == 0)
947 {
c88ad5db 948
675f64cd 949 TString lhcPeriod = GetLHCPeriod();
c88ad5db 950
951 if (lhcPeriod.Length() == 0)
952 {
953 Log("SHUTTLE","CopyFilesToGrid - LHCPeriod not found in logbook!");
954 return 0;
955 }
956
675f64cd 957 dir = Form("%s/GRP/RunMetadata/alice/data/%d/%s/%09d/Raw",
958 localBaseFolder.Data(), GetCurrentYear(),
c88ad5db 959 lhcPeriod.Data(), GetCurrentRun());
675f64cd 960 alienDir = dir(dir.Index("/alice/data/"), dir.Length());
961
c88ad5db 962 begin = "";
963 }
964 else
965 {
966 Log("SHUTTLE", "CopyFilesToGrid - Unexpected: type label must be reference or metadata!");
967 return kFALSE;
968 }
969
9827400b 970 TSystemDirectory* baseDir = new TSystemDirectory("/", dir);
3d8bc902 971 if (!baseDir)
972 return kTRUE;
973
2d9019b4 974 TList* dirList = baseDir->GetListOfFiles();
975 delete baseDir;
976
977 if (!dirList) return kTRUE;
978
979 if (dirList->GetEntries() < 3)
3d8bc902 980 {
2d9019b4 981 delete dirList;
9827400b 982 return kTRUE;
3d8bc902 983 }
2d9019b4 984
546242fb 985 if (!gGrid)
986 {
c88ad5db 987 Log("SHUTTLE", "CopyFilesToGrid - Connection to Grid failed: Cannot continue!");
2d9019b4 988 delete dirList;
546242fb 989 return kFALSE;
990 }
991
2d9019b4 992 Int_t nDirs = 0, nTransfer = 0;
993 TIter dirIter(dirList);
994 TSystemFile* entry = 0;
995
9827400b 996 Bool_t success = kTRUE;
3d8bc902 997 Bool_t first = kTRUE;
9827400b 998
2d9019b4 999 while ((entry = dynamic_cast<TSystemFile*> (dirIter.Next())))
1000 {
9827400b 1001 if (entry->IsDirectory())
1002 continue;
1003
1004 TString fileName(entry->GetName());
1005 if (!fileName.BeginsWith(begin))
1006 continue;
1007
2d9019b4 1008 nDirs++;
1009
3d8bc902 1010 if (first)
1011 {
1012 first = kFALSE;
c88ad5db 1013 // check that folder exists, otherwise create it
3d8bc902 1014 TGridResult* result = gGrid->Ls(alienDir.Data(), "a");
1015
1016 if (!result)
2d9019b4 1017 {
1018 delete dirList;
3d8bc902 1019 return kFALSE;
2d9019b4 1020 }
3d8bc902 1021
546242fb 1022 if (!result->GetFileName(1)) // TODO: It looks like element 0 is always 0!!
3d8bc902 1023 {
675f64cd 1024 // TODO It does not work currently! Bug in TAliEn::Mkdir
1025 // TODO Manually fixed in local root v5-16-00
c88ad5db 1026 if (!gGrid->Mkdir(alienDir.Data(),"-p",0))
3d8bc902 1027 {
c88ad5db 1028 Log("SHUTTLE", Form("CopyFilesToGrid - Cannot create directory %s",
3d8bc902 1029 alienDir.Data()));
2d9019b4 1030 delete dirList;
3d8bc902 1031 return kFALSE;
546242fb 1032 } else {
c88ad5db 1033 Log("SHUTTLE",Form("CopyFilesToGrid - Folder %s created", alienDir.Data()));
3d8bc902 1034 }
1035
546242fb 1036 } else {
c88ad5db 1037 Log("SHUTTLE",Form("CopyFilesToGrid - Folder %s found", alienDir.Data()));
3d8bc902 1038 }
1039 }
1040
9827400b 1041 TString fullLocalPath;
1042 fullLocalPath.Form("%s/%s", dir.Data(), fileName.Data());
1043
1044 TString fullGridPath;
1045 fullGridPath.Form("alien://%s/%s", alienDir.Data(), fileName.Data());
1046
a986b218 1047 Bool_t result = TFile::Cp(fullLocalPath, fullGridPath);
9827400b 1048
1049 if (result)
1050 {
c88ad5db 1051 Log("SHUTTLE", Form("CopyFilesToGrid - Copying local file %s to %s succeeded!",
1052 fullLocalPath.Data(), fullGridPath.Data()));
9827400b 1053 RemoveFile(fullLocalPath);
2d9019b4 1054 nTransfer++;
9827400b 1055 }
1056 else
1057 {
c88ad5db 1058 Log("SHUTTLE", Form("CopyFilesToGrid - Copying local file %s to %s FAILED!",
1059 fullLocalPath.Data(), fullGridPath.Data()));
9827400b 1060 success = kFALSE;
1061 }
1062 }
2d9019b4 1063
c88ad5db 1064 Log("SHUTTLE", Form("CopyFilesToGrid - %d (over %d) files in folder %s copied to Grid.",
1065 nTransfer, nDirs, dir.Data()));
2d9019b4 1066
1067
1068 delete dirList;
9827400b 1069 return success;
1070}
1071
1072//______________________________________________________________________________________________
2d9019b4 1073const char* AliShuttle::GetRefFilePrefix(const char* base, const char* detector)
1074{
1075 //
1076 // Get folder name of reference files
1077 //
1078
1079 TString offDetStr(GetOfflineDetName(detector));
1080 TString dir;
1081 if (offDetStr == "ITS" || offDetStr == "MUON" || offDetStr == "PHOS")
1082 {
1083 dir.Form("%s/%s/%s", base, offDetStr.Data(), detector);
1084 } else {
1085 dir.Form("%s/%s", base, offDetStr.Data());
1086 }
1087
1088 return dir.Data();
1089
1090
1091}
c88ad5db 1092
2d9019b4 1093//______________________________________________________________________________________________
3301427a 1094void AliShuttle::CleanLocalStorage(const TString& uri)
1095{
9827400b 1096 //
1097 // Called in case the preprocessor is declared failed. Remove remaining objects from the local storages.
1098 //
3301427a 1099
1100 const char* type = 0;
1101 if(uri == fgkLocalCDB) {
1102 type = "OCDB";
1103 } else if(uri == fgkLocalRefStorage) {
546242fb 1104 type = "Reference";
3301427a 1105 } else {
1106 AliError(Form("Invalid storage URI: %s", uri.Data()));
1107 return;
1108 }
1109
1110 AliCDBManager* man = AliCDBManager::Instance();
b948db8d 1111
3301427a 1112 // open local storage
1113 AliCDBStorage *localSto = man->GetStorage(uri);
1114 if(!localSto) {
1115 Log("SHUTTLE",
1116 Form("CleanLocalStorage - cannot activate local %s storage", type));
1117 return;
1118 }
1119
1120 TString filename(Form("%s/%s/*/Run*_v%d_s*.root",
546242fb 1121 localSto->GetBaseFolder().Data(), GetOfflineDetName(fCurrentDetector.Data()), GetCurrentRun()));
3301427a 1122
c88ad5db 1123 AliDebug(2, Form("filename = %s", filename.Data()));
3301427a 1124
c88ad5db 1125 Log("SHUTTLE", Form("Removing remaining local files for run %d and detector %s ...",
3301427a 1126 GetCurrentRun(), fCurrentDetector.Data()));
1127
1128 RemoveFile(filename.Data());
1129
1130}
1131
1132//______________________________________________________________________________________________
1133void AliShuttle::RemoveFile(const char* filename)
1134{
9827400b 1135 //
1136 // removes local file
1137 //
3301427a 1138
1139 TString command(Form("rm -f %s", filename));
1140
1141 Int_t result = gSystem->Exec(command.Data());
1142 if(result != 0)
1143 {
1144 Log("SHUTTLE", Form("RemoveFile - %s: Cannot remove file %s!",
1145 fCurrentDetector.Data(), filename));
1146 }
73abe331 1147}
1148
b948db8d 1149//______________________________________________________________________________________________
5164a766 1150AliShuttleStatus* AliShuttle::ReadShuttleStatus()
1151{
9827400b 1152 //
1153 // Reads the AliShuttleStatus from the CDB
1154 //
5164a766 1155
2bb7b766 1156 if (fStatusEntry){
1157 delete fStatusEntry;
1158 fStatusEntry = 0;
1159 }
5164a766 1160
10a5a932 1161 fStatusEntry = AliCDBManager::Instance()->GetStorage(GetLocalCDB())
2bb7b766 1162 ->Get(Form("/SHUTTLE/STATUS/%s", fCurrentDetector.Data()), GetCurrentRun());
5164a766 1163
2bb7b766 1164 if (!fStatusEntry) return 0;
1165 fStatusEntry->SetOwner(1);
5164a766 1166
2bb7b766 1167 AliShuttleStatus* status = dynamic_cast<AliShuttleStatus*> (fStatusEntry->GetObject());
1168 if (!status) {
1169 AliError("Invalid object stored to CDB!");
1170 return 0;
1171 }
5164a766 1172
2bb7b766 1173 return status;
5164a766 1174}
1175
1176//______________________________________________________________________________________________
7bfb2090 1177Bool_t AliShuttle::WriteShuttleStatus(AliShuttleStatus* status)
5164a766 1178{
9827400b 1179 //
1180 // writes the status for one subdetector
1181 //
2bb7b766 1182
1183 if (fStatusEntry){
1184 delete fStatusEntry;
1185 fStatusEntry = 0;
1186 }
5164a766 1187
2bb7b766 1188 Int_t run = GetCurrentRun();
5164a766 1189
2bb7b766 1190 AliCDBId id(AliCDBPath("SHUTTLE", "STATUS", fCurrentDetector), run, run);
5164a766 1191
2bb7b766 1192 fStatusEntry = new AliCDBEntry(status, id, new AliCDBMetaData);
1193 fStatusEntry->SetOwner(1);
5164a766 1194
2bb7b766 1195 UInt_t result = AliCDBManager::Instance()->GetStorage(fgkLocalCDB)->Put(fStatusEntry);
7bfb2090 1196
2bb7b766 1197 if (!result) {
3301427a 1198 Log("SHUTTLE", Form("WriteShuttleStatus - Failed for %s, run %d",
1199 fCurrentDetector.Data(), run));
2bb7b766 1200 return kFALSE;
1201 }
e7f62f16 1202
1203 SendMLInfo();
7bfb2090 1204
2bb7b766 1205 return kTRUE;
5164a766 1206}
1207
1208//______________________________________________________________________________________________
1209void AliShuttle::UpdateShuttleStatus(AliShuttleStatus::Status newStatus, Bool_t increaseCount)
1210{
9827400b 1211 //
1212 // changes the AliShuttleStatus for the given detector and run to the given status
1213 //
5164a766 1214
2bb7b766 1215 if (!fStatusEntry){
1216 AliError("UNEXPECTED: fStatusEntry empty");
1217 return;
1218 }
5164a766 1219
2bb7b766 1220 AliShuttleStatus* status = dynamic_cast<AliShuttleStatus*> (fStatusEntry->GetObject());
5164a766 1221
2bb7b766 1222 if (!status){
c88ad5db 1223 Log("SHUTTLE", "UpdateShuttleStatus - UNEXPECTED: status could not be read from current CDB entry");
2bb7b766 1224 return;
1225 }
5164a766 1226
2c15234c 1227 TString actionStr = Form("UpdateShuttleStatus - %s: Changing state from %s to %s",
eba76848 1228 fCurrentDetector.Data(),
36c99a6a 1229 status->GetStatusName(),
eba76848 1230 status->GetStatusName(newStatus));
cb343cfd 1231 Log("SHUTTLE", actionStr);
1232 SetLastAction(actionStr);
5164a766 1233
2bb7b766 1234 status->SetStatus(newStatus);
1235 if (increaseCount) status->IncreaseCount();
5164a766 1236
2bb7b766 1237 AliCDBManager::Instance()->GetStorage(fgkLocalCDB)->Put(fStatusEntry);
e7f62f16 1238
1239 SendMLInfo();
5164a766 1240}
e7f62f16 1241
1242//______________________________________________________________________________________________
1243void AliShuttle::SendMLInfo()
1244{
1245 //
1246 // sends ML information about the current status of the current detector being processed
1247 //
1248
1249 AliShuttleStatus* status = dynamic_cast<AliShuttleStatus*> (fStatusEntry->GetObject());
1250
1251 if (!status){
3301427a 1252 Log("SHUTTLE", "SendMLInfo - UNEXPECTED: status could not be read from current CDB entry");
e7f62f16 1253 return;
1254 }
1255
1256 TMonaLisaText mlStatus(Form("%s_status", fCurrentDetector.Data()), status->GetStatusName());
1257 TMonaLisaValue mlRetryCount(Form("%s_count", fCurrentDetector.Data()), status->GetCount());
1258
1259 TList mlList;
1260 mlList.Add(&mlStatus);
1261 mlList.Add(&mlRetryCount);
1262
1263 fMonaLisa->SendParameters(&mlList);
1264}
1265
5164a766 1266//______________________________________________________________________________________________
1267Bool_t AliShuttle::ContinueProcessing()
1268{
9827400b 1269 // this function reads the AliShuttleStatus information from CDB and
1270 // checks if the processing should be continued
1271 // if yes it returns kTRUE and updates the AliShuttleStatus with nextStatus
2bb7b766 1272
57c1a579 1273 if (!fConfig->HostProcessDetector(fCurrentDetector)) return kFALSE;
1274
1275 AliPreprocessor* aPreprocessor =
1276 dynamic_cast<AliPreprocessor*> (fPreprocessorMap.GetValue(fCurrentDetector));
1277 if (!aPreprocessor)
1278 {
c88ad5db 1279 Log("SHUTTLE", Form("ContinueProcessing - %s: no preprocessor registered", fCurrentDetector.Data()));
57c1a579 1280 return kFALSE;
1281 }
1282
2bb7b766 1283 AliShuttleLogbookEntry::Status entryStatus =
eba76848 1284 fLogbookEntry->GetDetectorStatus(fCurrentDetector);
2bb7b766 1285
1286 if(entryStatus != AliShuttleLogbookEntry::kUnprocessed) {
c88ad5db 1287 Log("SHUTTLE", Form("ContinueProcessing - %s is %s",
2bb7b766 1288 fCurrentDetector.Data(),
1289 fLogbookEntry->GetDetectorStatusName(entryStatus)));
1290 return kFALSE;
1291 }
1292
1293 // if we get here, according to Shuttle logbook subdetector is in UNPROCESSED state
be48e3ea 1294
1295 // check if current run is first unprocessed run for current detector
1296 if (fConfig->StrictRunOrder(fCurrentDetector) &&
1297 !fFirstUnprocessed[GetDetPos(fCurrentDetector)])
1298 {
86aa42c3 1299 if (fTestMode == kNone)
1300 {
c88ad5db 1301 Log("SHUTTLE", Form("ContinueProcessing - %s requires strict run ordering"
1302 " but this is not the first unprocessed run!"));
86aa42c3 1303 return kFALSE;
1304 }
1305 else
1306 {
c88ad5db 1307 Log("SHUTTLE", Form("ContinueProcessing - In TESTMODE - "
1308 "Although %s requires strict run ordering "
1309 "and this is not the first unprocessed run, "
1310 "the SHUTTLE continues"));
86aa42c3 1311 }
be48e3ea 1312 }
1313
2bb7b766 1314 AliShuttleStatus* status = ReadShuttleStatus();
1315 if (!status) {
1316 // first time
1317 Log("SHUTTLE", Form("ContinueProcessing - %s: Processing first time",
1318 fCurrentDetector.Data()));
1319 status = new AliShuttleStatus(AliShuttleStatus::kStarted);
1320 return WriteShuttleStatus(status);
1321 }
1322
1323 // The following two cases shouldn't happen if Shuttle Logbook was correctly updated.
1324 // If it happens it may mean Logbook updating failed... let's do it now!
1325 if (status->GetStatus() == AliShuttleStatus::kDone ||
1326 status->GetStatus() == AliShuttleStatus::kFailed){
1327 Log("SHUTTLE", Form("ContinueProcessing - %s is already %s. Updating Shuttle Logbook",
1328 fCurrentDetector.Data(),
1329 status->GetStatusName(status->GetStatus())));
1330 UpdateShuttleLogbook(fCurrentDetector.Data(),
1331 status->GetStatusName(status->GetStatus()));
1332 return kFALSE;
1333 }
1334
3301427a 1335 if (status->GetStatus() == AliShuttleStatus::kStoreError) {
2bb7b766 1336 Log("SHUTTLE",
c88ad5db 1337 Form("ContinueProcessing - %s: Grid storage of one or more "
1338 "objects failed. Trying again now",
2bb7b766 1339 fCurrentDetector.Data()));
9827400b 1340 UpdateShuttleStatus(AliShuttleStatus::kStoreStarted);
1341 if (StoreOCDB()){
c88ad5db 1342 Log("SHUTTLE", Form("ContinueProcessing - %s: all objects "
1343 "successfully stored into main storage",
3301427a 1344 fCurrentDetector.Data()));
2bb7b766 1345 } else {
1346 Log("SHUTTLE",
1347 Form("ContinueProcessing - %s: Grid storage failed again",
1348 fCurrentDetector.Data()));
9827400b 1349 UpdateShuttleStatus(AliShuttleStatus::kStoreError);
2bb7b766 1350 }
1351 return kFALSE;
1352 }
1353
1354 // if we get here, there is a restart
57c1a579 1355 Bool_t cont = kFALSE;
2bb7b766 1356
1357 // abort conditions
cb343cfd 1358 if (status->GetCount() >= fConfig->GetMaxRetries()) {
57c1a579 1359 Log("SHUTTLE", Form("ContinueProcessing - %s failed %d times in status %s - "
1360 "Updating Shuttle Logbook", fCurrentDetector.Data(),
2bb7b766 1361 status->GetCount(), status->GetStatusName()));
1362 UpdateShuttleLogbook(fCurrentDetector.Data(), "FAILED");
e7f62f16 1363 UpdateShuttleStatus(AliShuttleStatus::kFailed);
3301427a 1364
1365 // there may still be objects in local OCDB and reference storage
1366 // and FXS databases may be not updated: do it now!
9827400b 1367
1368 // TODO Currently disabled, we want to keep files in case of failure!
1369 // CleanLocalStorage(fgkLocalCDB);
1370 // CleanLocalStorage(fgkLocalRefStorage);
1371 // UpdateTableFailCase();
1372
1373 // Send mail to detector expert!
c88ad5db 1374 Log("SHUTTLE", Form("ContinueProcessing - Sending mail to %s expert...",
1375 fCurrentDetector.Data()));
9827400b 1376 if (!SendMail())
1377 Log("SHUTTLE", Form("ContinueProcessing - Could not send mail to %s expert",
1378 fCurrentDetector.Data()));
3301427a 1379
57c1a579 1380 } else {
1381 Log("SHUTTLE", Form("ContinueProcessing - %s: restarting. "
1382 "Aborted before with %s. Retry number %d.", fCurrentDetector.Data(),
1383 status->GetStatusName(), status->GetCount()));
9827400b 1384 Bool_t increaseCount = kTRUE;
c88ad5db 1385 if (status->GetStatus() == AliShuttleStatus::kDCSError ||
1386 status->GetStatus() == AliShuttleStatus::kDCSStarted)
1387 increaseCount = kFALSE;
675f64cd 1388
9827400b 1389 UpdateShuttleStatus(AliShuttleStatus::kStarted, increaseCount);
57c1a579 1390 cont = kTRUE;
2bb7b766 1391 }
1392
57c1a579 1393 return cont;
5164a766 1394}
1395
1396//______________________________________________________________________________________________
2bb7b766 1397Bool_t AliShuttle::Process(AliShuttleLogbookEntry* entry)
58bc3020 1398{
73abe331 1399 //
b948db8d 1400 // Makes data retrieval for all detectors in the configuration.
2bb7b766 1401 // entry: Shuttle logbook entry, contains run paramenters and status of detectors
1402 // (Unprocessed, Inactive, Failed or Done).
d477ad88 1403 // Returns kFALSE in case of error occured and kTRUE otherwise
73abe331 1404 //
1405
9827400b 1406 if (!entry) return kFALSE;
2bb7b766 1407
1408 fLogbookEntry = entry;
1409
c88ad5db 1410 Log("SHUTTLE", Form("\t\t\t^*^*^*^*^*^*^*^*^*^*^*^* run %d: START ^*^*^*^*^*^*^*^*^*^*^*^*",
9827400b 1411 GetCurrentRun()));
2bb7b766 1412
e7f62f16 1413 // create ML instance that monitors this run
468df1ce 1414 fMonaLisa = new TMonaLisaWriter(fConfig->GetMonitorHost(), fConfig->GetMonitorTable(), Form("%d", GetCurrentRun()));
2bb7b766 1415
e7f62f16 1416 // Send the information to ML
1417 TMonaLisaText mlStatus("SHUTTLE_status", "Processing");
9827400b 1418 TMonaLisaText mlRunType("SHUTTLE_runtype", Form("%s (%s)", entry->GetRunType(), entry->GetRunParameter("log")));
e7f62f16 1419
1420 TList mlList;
1421 mlList.Add(&mlStatus);
9827400b 1422 mlList.Add(&mlRunType);
e7f62f16 1423
1424 fMonaLisa->SendParameters(&mlList);
3301427a 1425
9827400b 1426 if (fLogbookEntry->IsDone())
1427 {
1428 Log("SHUTTLE","Process - Shuttle is already DONE. Updating logbook");
1429 UpdateShuttleLogbook("shuttle_done");
1430 fLogbookEntry = 0;
1431 return kTRUE;
1432 }
1433
1434 // read test mode if flag is set
1435 if (fReadTestMode)
1436 {
3d8bc902 1437 fTestMode = kNone;
9827400b 1438 TString logEntry(entry->GetRunParameter("log"));
1439 //printf("log entry = %s\n", logEntry.Data());
1440 TString searchStr("Testmode: ");
1441 Int_t pos = logEntry.Index(searchStr.Data());
1442 //printf("%d\n", pos);
1443 if (pos >= 0)
1444 {
1445 TSubString subStr = logEntry(pos + searchStr.Length(), logEntry.Length());
1446 //printf("%s\n", subStr.String().Data());
1447 TString newStr(subStr.Data());
1448 TObjArray* token = newStr.Tokenize(' ');
1449 if (token)
1450 {
1451 //token->Print();
1452 TObjString* tmpStr = dynamic_cast<TObjString*> (token->First());
1453 if (tmpStr)
1454 {
1455 Int_t testMode = tmpStr->String().Atoi();
1456 if (testMode > 0)
1457 {
c88ad5db 1458 Log("SHUTTLE", Form("Process - Enabling test mode %d", testMode));
9827400b 1459 SetTestMode((TestMode) testMode);
1460 }
1461 }
1462 delete token;
1463 }
1464 }
1465 }
c88ad5db 1466
eba76848 1467 fLogbookEntry->Print("all");
57f50b3c 1468
1469 // Initialization
d477ad88 1470 Bool_t hasError = kFALSE;
5164a766 1471
675f64cd 1472 // Set the CDB and Reference folders according to the year and LHC period
1473 TString lhcPeriod(GetLHCPeriod());
1474 if (lhcPeriod.Length() == 0)
1475 {
7d4cf768 1476 Log("SHUTTLE","Process - LHCPeriod not found in logbook!");
1477 return 0;
675f64cd 1478 }
1479
1480 if (fgkMainCDB.Length() == 0)
1481 fgkMainCDB = Form("alien://folder=/alice/data/%d/%s/OCDB?user=alidaq?cacheFold=/tmp/OCDBCache",
1482 GetCurrentYear(), lhcPeriod.Data());
1483
1484 if (fgkMainRefStorage.Length() == 0)
1485 fgkMainRefStorage = Form("alien://folder=/alice/data/%d/%s/Reference?user=alidaq?cacheFold=/tmp/OCDBCache",
1486 GetCurrentYear(), lhcPeriod.Data());
1487
2bb7b766 1488 AliCDBStorage *mainCDBSto = AliCDBManager::Instance()->GetStorage(fgkMainCDB);
1489 if(mainCDBSto) mainCDBSto->QueryCDB(GetCurrentRun());
1490 AliCDBStorage *mainRefSto = AliCDBManager::Instance()->GetStorage(fgkMainRefStorage);
1491 if(mainRefSto) mainRefSto->QueryCDB(GetCurrentRun());
d477ad88 1492
57f50b3c 1493 // Loop on detectors in the configuration
b948db8d 1494 TIter iter(fConfig->GetDetectors());
2bb7b766 1495 TObjString* aDetector = 0;
b948db8d 1496
be48e3ea 1497 while ((aDetector = (TObjString*) iter.Next()))
1498 {
7bfb2090 1499 fCurrentDetector = aDetector->String();
5164a766 1500
9e080f92 1501 if (ContinueProcessing() == kFALSE) continue;
1502
c88ad5db 1503 Log("SHUTTLE", Form("\t\t\t****** run %d - %s: START ******",
2bb7b766 1504 GetCurrentRun(), aDetector->GetName()));
1505
9d733021 1506 for(Int_t iSys=0;iSys<3;iSys++) fFXSCalled[iSys]=kFALSE;
1507
c88ad5db 1508 Log(fCurrentDetector.Data(), "Process - Starting processing");
85a80aa9 1509
be48e3ea 1510 Int_t pid = fork();
1511
1512 if (pid < 0)
1513 {
c88ad5db 1514 Log("SHUTTLE", "Process - ERROR: Forking failed");
be48e3ea 1515 }
1516 else if (pid > 0)
1517 {
1518 // parent
c88ad5db 1519 Log("SHUTTLE", Form("Process - In parent process of %d - %s: Starting monitoring",
be48e3ea 1520 GetCurrentRun(), aDetector->GetName()));
1521
1522 Long_t begin = time(0);
1523
1524 int status; // to be used with waitpid, on purpose an int (not Int_t)!
1525 while (waitpid(pid, &status, WNOHANG) == 0)
1526 {
1527 Long_t expiredTime = time(0) - begin;
1528
1529 if (expiredTime > fConfig->GetPPTimeOut())
1530 {
9827400b 1531 TString tmp;
c88ad5db 1532 tmp.Form("Process - Process of %s time out. "
1533 "Run time: %d seconds. Killing...",
1534 fCurrentDetector.Data(), expiredTime);
9827400b 1535 Log("SHUTTLE", tmp);
1536 Log(fCurrentDetector, tmp);
be48e3ea 1537
1538 kill(pid, 9);
1539
3301427a 1540 UpdateShuttleStatus(AliShuttleStatus::kPPTimeOut);
be48e3ea 1541 hasError = kTRUE;
1542
1543 gSystem->Sleep(1000);
1544 }
1545 else
1546 {
be48e3ea 1547 gSystem->Sleep(1000);
9827400b 1548
1549 TString checkStr;
1550 checkStr.Form("ps -o vsize --pid %d | tail -n 1", pid);
1551 FILE* pipe = gSystem->OpenPipe(checkStr, "r");
1552 if (!pipe)
1553 {
c88ad5db 1554 Log("SHUTTLE", Form("Process - Error: "
1555 "Could not open pipe to %s", checkStr.Data()));
9827400b 1556 continue;
1557 }
1558
1559 char buffer[100];
1560 if (!fgets(buffer, 100, pipe))
1561 {
c88ad5db 1562 Log("SHUTTLE", "Process - Error: ps did not return anything");
9827400b 1563 gSystem->ClosePipe(pipe);
1564 continue;
1565 }
1566 gSystem->ClosePipe(pipe);
1567
1568 //Log("SHUTTLE", Form("ps returned %s", buffer));
1569
1570 Int_t mem = 0;
1571 if ((sscanf(buffer, "%d\n", &mem) != 1) || !mem)
1572 {
c88ad5db 1573 Log("SHUTTLE", "Process - Error: Could not parse output of ps");
9827400b 1574 continue;
1575 }
1576
1577 if (expiredTime % 60 == 0)
c88ad5db 1578 Log("SHUTTLE", Form("Process - %s: Checking process. "
1579 "Run time: %d seconds - Memory consumption: %d KB",
1580 fCurrentDetector.Data(), expiredTime, mem));
9827400b 1581
1582 if (mem > fConfig->GetPPMaxMem())
1583 {
1584 TString tmp;
c88ad5db 1585 tmp.Form("Process - Process exceeds maximum allowed memory "
1586 "(%d KB > %d KB). Killing...",
9827400b 1587 mem, fConfig->GetPPMaxMem());
1588 Log("SHUTTLE", tmp);
1589 Log(fCurrentDetector, tmp);
1590
1591 kill(pid, 9);
1592
1593 UpdateShuttleStatus(AliShuttleStatus::kPPOutOfMemory);
1594 hasError = kTRUE;
1595
1596 gSystem->Sleep(1000);
1597 }
be48e3ea 1598 }
1599 }
1600
c88ad5db 1601 Log("SHUTTLE", Form("Process - In parent process of %d - %s: Client has terminated.",
be48e3ea 1602 GetCurrentRun(), aDetector->GetName()));
1603
1604 if (WIFEXITED(status))
1605 {
1606 Int_t returnCode = WEXITSTATUS(status);
1607
c88ad5db 1608 Log("SHUTTLE", Form("Process - %s: the return code is %d", fCurrentDetector.Data(),
3301427a 1609 returnCode));
be48e3ea 1610
9827400b 1611 if (returnCode == 0) hasError = kTRUE;
be48e3ea 1612 }
1613 }
1614 else if (pid == 0)
1615 {
1616 // client
c88ad5db 1617 Log("SHUTTLE", Form("Process - In client process of %d - %s", GetCurrentRun(),
1618 aDetector->GetName()));
be48e3ea 1619
c88ad5db 1620 Log("SHUTTLE", Form("Process - Redirecting output to %s log",fCurrentDetector.Data()));
ffa29e93 1621
546242fb 1622 if ((freopen(GetLogFileName(fCurrentDetector), "a", stdout)) == 0)
ffa29e93 1623 {
c88ad5db 1624 Log("SHUTTLE", "Process - Could not freopen stdout");
ffa29e93 1625 }
1626 else
1627 {
1628 fOutputRedirected = kTRUE;
1629 if ((dup2(fileno(stdout), fileno(stderr))) < 0)
c88ad5db 1630 Log("SHUTTLE", "Process - Could not redirect stderr");
ffa29e93 1631
1632 }
1633
5bac2bde 1634 TString wd = gSystem->WorkingDirectory();
675f64cd 1635 TString tmpDir = Form("%s/%s_%d_process", GetShuttleTempDir(),
1636 fCurrentDetector.Data(), GetCurrentRun());
5bac2bde 1637
d524ade6 1638 Int_t result = gSystem->GetPathInfo(tmpDir.Data(), 0, (Long64_t*) 0, 0, 0);
1639 if (!result) // temp dir already exists!
1640 {
1641 Log(fCurrentDetector.Data(),
1642 Form("Process - %s dir already exists! Removing...", tmpDir.Data()));
1643 gSystem->Exec(Form("rm -rf %s",tmpDir.Data()));
675f64cd 1644 }
1645
1646 if (gSystem->mkdir(tmpDir.Data(), 1))
1647 {
1648 Log(fCurrentDetector.Data(), "Process - could not make temp directory!!");
1649 gSystem->Exit(1);
d524ade6 1650 }
1651
1652 if (!gSystem->ChangeDirectory(tmpDir.Data()))
1653 {
1654 Log(fCurrentDetector.Data(), "Process - could not change directory!!");
1655 gSystem->Exit(1);
1656 }
5bac2bde 1657
9827400b 1658 Bool_t success = ProcessCurrentDetector();
5bac2bde 1659
1660 gSystem->ChangeDirectory(wd.Data());
b1d18693 1661
9827400b 1662 if (success) // Preprocessor finished successfully!
1663 {
b1d18693 1664 // remove temporary folder
1665 gSystem->Exec(Form("rm -rf %s",tmpDir.Data()));
1666
3301427a 1667 // Update time_processed field in FXS DB
1668 if (UpdateTable() == kFALSE)
5bac2bde 1669 Log("SHUTTLE", Form("Process - %s: Could not update FXS databases!",
1670 fCurrentDetector.Data()));
3301427a 1671
1672 // Transfer the data from local storage to main storage (Grid)
1673 UpdateShuttleStatus(AliShuttleStatus::kStoreStarted);
1674 if (StoreOCDB() == kFALSE)
1675 {
c88ad5db 1676 Log("SHUTTLE",
1677 Form("\t\t\t****** run %d - %s: STORAGE ERROR ******",
3301427a 1678 GetCurrentRun(), aDetector->GetName()));
1679 UpdateShuttleStatus(AliShuttleStatus::kStoreError);
9827400b 1680 success = kFALSE;
3301427a 1681 } else {
c88ad5db 1682 Log("SHUTTLE",
1683 Form("\t\t\t****** run %d - %s: DONE ******",
3301427a 1684 GetCurrentRun(), aDetector->GetName()));
1685 UpdateShuttleStatus(AliShuttleStatus::kDone);
9827400b 1686 UpdateShuttleLogbook(fCurrentDetector, "DONE");
3301427a 1687 }
c88ad5db 1688 } else
1689 {
1690 Log("SHUTTLE",
1691 Form("\t\t\t****** run %d - %s: PP ERROR ******",
1692 GetCurrentRun(), aDetector->GetName()));
be48e3ea 1693 }
1694
4b95672b 1695 for (UInt_t iSys=0; iSys<3; iSys++)
1696 {
1697 if (fFXSCalled[iSys]) fFXSlist[iSys].Clear();
1698 }
1699
c88ad5db 1700 Log("SHUTTLE", Form("Process - Client process of %d - %s is exiting now with %d.",
9827400b 1701 GetCurrentRun(), aDetector->GetName(), success));
be48e3ea 1702
1703 // the client exits here
9827400b 1704 gSystem->Exit(success);
be48e3ea 1705
1706 AliError("We should never get here!!!");
1707 }
7bfb2090 1708 }
5164a766 1709
c88ad5db 1710 Log("SHUTTLE", Form("\t\t\t^*^*^*^*^*^*^*^*^*^*^*^* run %d: FINISH ^*^*^*^*^*^*^*^*^*^*^*^*",
2bb7b766 1711 GetCurrentRun()));
1712
1713 //check if shuttle is done for this run, if so update logbook
1714 TObjArray checkEntryArray;
1715 checkEntryArray.SetOwner(1);
9e080f92 1716 TString whereClause = Form("where run=%d", GetCurrentRun());
b0e53b15 1717 if (!QueryShuttleLogbook(whereClause.Data(), checkEntryArray) ||
1718 checkEntryArray.GetEntries() == 0) {
9e080f92 1719 Log("SHUTTLE", Form("Process - Warning: Cannot check status of run %d on Shuttle logbook!",
1720 GetCurrentRun()));
1721 return hasError == kFALSE;
1722 }
b948db8d 1723
9e080f92 1724 AliShuttleLogbookEntry* checkEntry = dynamic_cast<AliShuttleLogbookEntry*>
1725 (checkEntryArray.At(0));
2bb7b766 1726
9e080f92 1727 if (checkEntry)
1728 {
1729 if (checkEntry->IsDone())
be48e3ea 1730 {
9e080f92 1731 Log("SHUTTLE","Process - Shuttle is DONE. Updating logbook");
1732 UpdateShuttleLogbook("shuttle_done");
1733 }
1734 else
1735 {
1736 for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
be48e3ea 1737 {
9e080f92 1738 if (checkEntry->GetDetectorStatus(iDet) == AliShuttleLogbookEntry::kUnprocessed)
be48e3ea 1739 {
9e080f92 1740 AliDebug(2, Form("Run %d: setting %s as \"not first time unprocessed\"",
1741 checkEntry->GetRun(), GetDetName(iDet)));
1742 fFirstUnprocessed[iDet] = kFALSE;
be48e3ea 1743 }
1744 }
2bb7b766 1745 }
1746 }
1747
e7f62f16 1748 // remove ML instance
1749 delete fMonaLisa;
1750 fMonaLisa = 0;
1751
2bb7b766 1752 fLogbookEntry = 0;
85a80aa9 1753
a7160fe9 1754 return hasError == kFALSE;
73abe331 1755}
1756
b948db8d 1757//______________________________________________________________________________________________
9827400b 1758Bool_t AliShuttle::ProcessCurrentDetector()
73abe331 1759{
1760 //
2bb7b766 1761 // Makes data retrieval just for a specific detector (fCurrentDetector).
73abe331 1762 // Threre should be a configuration for this detector.
73abe331 1763
1d172743 1764 Log("SHUTTLE", Form("ProcessCurrentDetector - Retrieving values for %s, run %d",
1765 fCurrentDetector.Data(), GetCurrentRun()));
73abe331 1766
d524ade6 1767 TString wd = gSystem->WorkingDirectory();
1768
2d9019b4 1769 if (!CleanReferenceStorage(fCurrentDetector.Data()))
546242fb 1770 return kFALSE;
d524ade6 1771
1772 gSystem->ChangeDirectory(wd.Data());
1773
1d172743 1774 TMap* dcsMap = new TMap();
3301427a 1775
1776 // call preprocessor
1777 AliPreprocessor* aPreprocessor =
1778 dynamic_cast<AliPreprocessor*> (fPreprocessorMap.GetValue(fCurrentDetector));
1779
1780 aPreprocessor->Initialize(GetCurrentRun(), GetCurrentStartTime(), GetCurrentEndTime());
1781
1782 Bool_t processDCS = aPreprocessor->ProcessDCS();
d477ad88 1783
651fdaab 1784 if (!processDCS)
1785 {
1d172743 1786 Log(fCurrentDetector, "ProcessCurrentDetector -"
1787 " The preprocessor requested to skip the retrieval of DCS values");
651fdaab 1788 }
8b739301 1789 else if (fTestMode & kSkipDCS)
2c15234c 1790 {
1d172743 1791 Log(fCurrentDetector, "ProcessCurrentDetector - In TESTMODE: Skipping DCS processing");
9827400b 1792 }
1793 else if (fTestMode & kErrorDCS)
1794 {
1d172743 1795 Log(fCurrentDetector, "ProcessCurrentDetector - In TESTMODE: Simulating DCS error");
3d8bc902 1796 UpdateShuttleStatus(AliShuttleStatus::kDCSStarted);
9827400b 1797 UpdateShuttleStatus(AliShuttleStatus::kDCSError);
1d172743 1798 delete dcsMap;
9827400b 1799 return kFALSE;
2c15234c 1800 } else {
3301427a 1801
1802 UpdateShuttleStatus(AliShuttleStatus::kDCSStarted);
1803
1d172743 1804 // Query DCS archive
1805 Int_t nServers = fConfig->GetNServers(fCurrentDetector);
a038aa70 1806
1d172743 1807 for (int iServ=0; iServ<nServers; iServ++)
2c15234c 1808 {
1d172743 1809
1810 TString host(fConfig->GetDCSHost(fCurrentDetector, iServ));
1811 Int_t port = fConfig->GetDCSPort(fCurrentDetector, iServ);
542b6cc8 1812 Int_t multiSplit = fConfig->GetMultiSplit(fCurrentDetector, iServ);
1813
1790d4b7 1814 Log(fCurrentDetector, Form("ProcessCurrentDetector -"
1815 " Querying DCS Amanda server %s:%d (%d of %d)",
1816 host.Data(), port, iServ+1, nServers));
1d172743 1817
1818 TMap* aliasMap = 0;
1819 TMap* dpMap = 0;
1820
1821 if (fConfig->GetDCSAliases(fCurrentDetector, iServ)->GetEntries() > 0)
2c15234c 1822 {
1d172743 1823 aliasMap = GetValueSet(host, port,
542b6cc8 1824 fConfig->GetDCSAliases(fCurrentDetector, iServ),
1825 kAlias, multiSplit);
1d172743 1826 if (!aliasMap)
1827 {
1828 Log(fCurrentDetector,
1829 Form("ProcessCurrentDetector -"
675f64cd 1830 " Error retrieving DCS aliases from server %s."
1831 " Sending mail to DCS experts!", host.Data()));
1d172743 1832 UpdateShuttleStatus(AliShuttleStatus::kDCSError);
675f64cd 1833
1834 if (!SendMailToDCS())
1835 Log("SHUTTLE", Form("ProcessCurrentDetector - Could not send mail to DCS experts!"));
1836
a038aa70 1837 delete dcsMap;
1d172743 1838 return kFALSE;
1839 }
2c15234c 1840 }
a038aa70 1841
1d172743 1842 if (fConfig->GetDCSDataPoints(fCurrentDetector, iServ)->GetEntries() > 0)
a038aa70 1843 {
1d172743 1844 dpMap = GetValueSet(host, port,
542b6cc8 1845 fConfig->GetDCSDataPoints(fCurrentDetector, iServ),
1846 kDP, multiSplit);
1d172743 1847 if (!dpMap)
1848 {
1849 Log(fCurrentDetector,
1850 Form("ProcessCurrentDetector -"
675f64cd 1851 " Error retrieving DCS data points from server %s."
1852 " Sending mail to DCS experts!", host.Data()));
1d172743 1853 UpdateShuttleStatus(AliShuttleStatus::kDCSError);
675f64cd 1854
1855 if (!SendMailToDCS())
1856 Log("SHUTTLE", Form("ProcessCurrentDetector - Could not send mail to DCS experts!"));
1857
1d172743 1858 if (aliasMap) delete aliasMap;
1859 delete dcsMap;
1860 return kFALSE;
1861 }
a038aa70 1862 }
1d172743 1863
1864 // merge aliasMap and dpMap into dcsMap
1865 if(aliasMap) {
1866 TIter iter(aliasMap);
a038aa70 1867 TObjString* key = 0;
1868 while ((key = (TObjString*) iter.Next()))
1d172743 1869 dcsMap->Add(key, aliasMap->GetValue(key->String()));
1870
1871 aliasMap->SetOwner(kFALSE);
1872 delete aliasMap;
1873 }
1874
1875 if(dpMap) {
1876 TIter iter(dpMap);
1877 TObjString* key = 0;
1878 while ((key = (TObjString*) iter.Next()))
1879 dcsMap->Add(key, dpMap->GetValue(key->String()));
1880
1881 dpMap->SetOwner(kFALSE);
1882 delete dpMap;
a038aa70 1883 }
73abe331 1884 }
1885 }
dc25836b 1886
b1d18693 1887 // save map into file, to help debugging in case of preprocessor error
1888 TFile* f = TFile::Open("DCSMap.root","recreate");
1889 f->cd();
1890 dcsMap->Write("DCSMap", TObject::kSingleKey);
1891 f->Close();
1892 delete f;
1893
2bb7b766 1894 // DCS Archive DB processing successful. Call Preprocessor!
85a80aa9 1895 UpdateShuttleStatus(AliShuttleStatus::kPPStarted);
a7160fe9 1896
a038aa70 1897 UInt_t returnValue = aPreprocessor->Process(dcsMap);
b948db8d 1898
3301427a 1899 if (returnValue > 0) // Preprocessor error!
1900 {
c88ad5db 1901 Log(fCurrentDetector, Form("ProcessCurrentDetector - "
1902 "Preprocessor failed. Process returned %d.", returnValue));
cb343cfd 1903 UpdateShuttleStatus(AliShuttleStatus::kPPError);
a038aa70 1904 dcsMap->DeleteAll();
1905 delete dcsMap;
9827400b 1906 return kFALSE;
1907 }
1908
1909 // preprocessor ok!
1910 UpdateShuttleStatus(AliShuttleStatus::kPPDone);
1911 Log(fCurrentDetector, Form("ProcessCurrentDetector - %s preprocessor returned success",
1912 fCurrentDetector.Data()));
b948db8d 1913
a038aa70 1914 dcsMap->DeleteAll();
1915 delete dcsMap;
b948db8d 1916
9827400b 1917 return kTRUE;
2bb7b766 1918}
1919
1920//______________________________________________________________________________________________
1921Bool_t AliShuttle::QueryShuttleLogbook(const char* whereClause,
1922 TObjArray& entries)
1923{
9827400b 1924 // Query DAQ's Shuttle logbook and fills detector status object.
1925 // Call QueryRunParameters to query DAQ logbook for run parameters.
1926 //
2bb7b766 1927
fc5a4708 1928 entries.SetOwner(1);
1929
2bb7b766 1930 // check connection, in case connect
be48e3ea 1931 if(!Connect(3)) return kFALSE;
2bb7b766 1932
1933 TString sqlQuery;
441b0e9c 1934 sqlQuery = Form("select * from %s %s order by run", fConfig->GetShuttlelbTable(), whereClause);
2bb7b766 1935
be48e3ea 1936 TSQLResult* aResult = fServer[3]->Query(sqlQuery);
2bb7b766 1937 if (!aResult) {
1938 AliError(Form("Can't execute query <%s>!", sqlQuery.Data()));
1939 return kFALSE;
1940 }
1941
fc5a4708 1942 AliDebug(2,Form("Query = %s", sqlQuery.Data()));
1943
2bb7b766 1944 if(aResult->GetRowCount() == 0) {
c88ad5db 1945 Log("SHUTTLE", "No entries in Shuttle Logbook match request");
9827400b 1946 delete aResult;
1947 return kTRUE;
2bb7b766 1948 }
1949
1950 // TODO Check field count!
db99d43e 1951 const UInt_t nCols = 23;
2bb7b766 1952 if (aResult->GetFieldCount() != (Int_t) nCols) {
c88ad5db 1953 Log("SHUTTLE", "Invalid SQL result field number!");
2bb7b766 1954 delete aResult;
1955 return kFALSE;
1956 }
1957
2bb7b766 1958 TSQLRow* aRow;
1959 while ((aRow = aResult->Next())) {
1960 TString runString(aRow->GetField(0), aRow->GetFieldLength(0));
1961 Int_t run = runString.Atoi();
1962
eba76848 1963 AliShuttleLogbookEntry *entry = QueryRunParameters(run);
1964 if (!entry)
1965 continue;
2bb7b766 1966
1967 // loop on detectors
eba76848 1968 for(UInt_t ii = 0; ii < nCols; ii++)
1969 entry->SetDetectorStatus(aResult->GetFieldName(ii), aRow->GetField(ii));
2bb7b766 1970
eba76848 1971 entries.AddLast(entry);
2bb7b766 1972 delete aRow;
1973 }
1974
2bb7b766 1975 delete aResult;
1976 return kTRUE;
1977}
1978
1979//______________________________________________________________________________________________
eba76848 1980AliShuttleLogbookEntry* AliShuttle::QueryRunParameters(Int_t run)
2bb7b766 1981{
eba76848 1982 //
1983 // Retrieve run parameters written in the DAQ logbook and sets them into AliShuttleLogbookEntry object
1984 //
2bb7b766 1985
1986 // check connection, in case connect
be48e3ea 1987 if (!Connect(3))
eba76848 1988 return 0;
2bb7b766 1989
1990 TString sqlQuery;
2c15234c 1991 sqlQuery.Form("select * from %s where run=%d", fConfig->GetDAQlbTable(), run);
2bb7b766 1992
be48e3ea 1993 TSQLResult* aResult = fServer[3]->Query(sqlQuery);
2bb7b766 1994 if (!aResult) {
c88ad5db 1995 Log("SHUTTLE", Form("Can't execute query <%s>!", sqlQuery.Data()));
eba76848 1996 return 0;
2bb7b766 1997 }
1998
eba76848 1999 if (aResult->GetRowCount() == 0) {
2bb7b766 2000 Log("SHUTTLE", Form("QueryRunParameters - No entry in DAQ Logbook for run %d. Skipping", run));
2001 delete aResult;
eba76848 2002 return 0;
2bb7b766 2003 }
2004
eba76848 2005 if (aResult->GetRowCount() > 1) {
c88ad5db 2006 Log("SHUTTLE", Form("QueryRunParameters - UNEXPECTED: "
2007 "more than one entry in DAQ Logbook for run %d!", run));
2bb7b766 2008 delete aResult;
eba76848 2009 return 0;
2bb7b766 2010 }
2011
eba76848 2012 TSQLRow* aRow = aResult->Next();
2013 if (!aRow)
2014 {
c88ad5db 2015 Log("SHUTTLE", Form("QueryRunParameters - Could not retrieve row for run %d. Skipping", run));
eba76848 2016 delete aResult;
2017 return 0;
2018 }
2bb7b766 2019
eba76848 2020 AliShuttleLogbookEntry* entry = new AliShuttleLogbookEntry(run);
2bb7b766 2021
eba76848 2022 for (Int_t ii = 0; ii < aResult->GetFieldCount(); ii++)
2023 entry->SetRunParameter(aResult->GetFieldName(ii), aRow->GetField(ii));
2bb7b766 2024
eba76848 2025 UInt_t startTime = entry->GetStartTime();
2026 UInt_t endTime = entry->GetEndTime();
2027
b0e53b15 2028 if (!startTime || !endTime || startTime > endTime)
2029 {
2030 Log("SHUTTLE",
2031 Form("QueryRunParameters - Invalid parameters for Run %d: startTime = %d, endTime = %d. Skipping!",
2032 run, startTime, endTime));
2033
2034 Log("SHUTTLE", Form("Marking SHUTTLE done for run %d", run));
2035 fLogbookEntry = entry;
2036 if (!UpdateShuttleLogbook("shuttle_done"))
2037 {
2038 AliError(Form("Could not update logbook for run %d !", run));
2039 }
2040 fLogbookEntry = 0;
2041
2042 delete entry;
2043 delete aRow;
2044 delete aResult;
2045 return 0;
2046 }
2047
2048 TString totEventsStr = entry->GetRunParameter("totalEvents");
2049 Int_t totEvents = totEventsStr.Atoi();
2050 if (totEvents < 1)
2051 {
eba76848 2052 Log("SHUTTLE",
b0e53b15 2053 Form("QueryRunParameters - Run %d has 0 events - Skipping!", run));
2054
2055 Log("SHUTTLE", Form("Marking SHUTTLE done for run %d", run));
2056 fLogbookEntry = entry;
2057 if (!UpdateShuttleLogbook("shuttle_done"))
2058 {
2059 AliError(Form("Could not update logbook for run %d !", run));
2060 }
2061 fLogbookEntry = 0;
2062
eba76848 2063 delete entry;
2bb7b766 2064 delete aRow;
eba76848 2065 delete aResult;
2066 return 0;
2bb7b766 2067 }
2068
eba76848 2069 delete aRow;
2bb7b766 2070 delete aResult;
eba76848 2071
2072 return entry;
2bb7b766 2073}
2074
2075//______________________________________________________________________________________________
a038aa70 2076TMap* AliShuttle::GetValueSet(const char* host, Int_t port, const TSeqCollection* entries,
542b6cc8 2077 DCSType type, Int_t multiSplit)
a038aa70 2078{
2079 // Retrieve all "entry" data points from the DCS server
2080 // host, port: TSocket connection parameters
2081 // entries: list of name of the alias or data point
2082 // type: kAlias or kDP
2083 // returns TMap of values, 0 when failure
542b6cc8 2084
2085 AliDCSClient client(host, port, fTimeout, fRetries, multiSplit);
b41b252a 2086
a038aa70 2087 TMap* result = 0;
b41b252a 2088 if (type == kAlias)
a038aa70 2089 {
b41b252a 2090 result = client.GetAliasValues(entries, GetCurrentStartTime(),
2091 GetCurrentEndTime());
2092 }
2093 else if (type == kDP)
2094 {
2095 result = client.GetDPValues(entries, GetCurrentStartTime(),
2096 GetCurrentEndTime());
2097 }
a038aa70 2098
b41b252a 2099 if (result == 0)
2100 {
2101 Log(fCurrentDetector.Data(), Form("GetValueSet - Can't get entries! Reason: %s",
1790d4b7 2102 client.GetErrorString(client.GetResultErrorCode())));
2103 if (client.GetResultErrorCode() == AliDCSClient::fgkServerError)
2104 Log(fCurrentDetector.Data(), Form("GetValueSet - Server error code: %s",
2105 client.GetServerError().Data()));
a038aa70 2106
b41b252a 2107 return 0;
a038aa70 2108 }
b41b252a 2109
a038aa70 2110 return result;
2111}
b41b252a 2112
a038aa70 2113//______________________________________________________________________________________________
57f50b3c 2114const char* AliShuttle::GetFile(Int_t system, const char* detector,
2115 const char* id, const char* source)
b948db8d 2116{
9827400b 2117 // Get calibration file from file exchange servers
2118 // First queris the FXS database for the file name, using the run, detector, id and source info
2119 // then calls RetrieveFile(filename) for actual copy to local disk
2120 // run: current run being processed (given by Logbook entry fLogbookEntry)
2121 // detector: the Preprocessor name
2122 // id: provided as a parameter by the Preprocessor
2123 // source: provided by the Preprocessor through GetFileSources function
2124
2125 // check if test mode should simulate a FXS error
2126 if (fTestMode & kErrorFXSFiles)
2127 {
2128 Log(detector, Form("GetFile - In TESTMODE - Simulating error while connecting to %s FXS", GetSystemName(system)));
2129 return 0;
2130 }
2131
57f50b3c 2132 // check connection, in case connect
9d733021 2133 if (!Connect(system))
eba76848 2134 {
9d733021 2135 Log(detector, Form("GetFile - Couldn't connect to %s FXS database", GetSystemName(system)));
57f50b3c 2136 return 0;
2137 }
2138
2139 // Query preparation
9d733021 2140 TString sourceName(source);
d386d623 2141 Int_t nFields = 3;
2142 TString sqlQueryStart = Form("select filePath,size,fileChecksum from %s where",
2143 fConfig->GetFXSdbTable(system));
2144 TString whereClause = Form("run=%d and detector=\"%s\" and fileId=\"%s\"",
2145 GetCurrentRun(), detector, id);
2146
9d733021 2147 if (system == kDAQ)
2148 {
d386d623 2149 whereClause += Form(" and DAQsource=\"%s\"", source);
57f50b3c 2150 }
9d733021 2151 else if (system == kDCS)
eba76848 2152 {
9d733021 2153 sourceName="none";
57f50b3c 2154 }
9d733021 2155 else if (system == kHLT)
9e080f92 2156 {
d386d623 2157 whereClause += Form(" and DDLnumbers=\"%s\"", source);
9d733021 2158 nFields = 3;
9e080f92 2159 }
2160
9e080f92 2161 TString sqlQuery = Form("%s %s", sqlQueryStart.Data(), whereClause.Data());
2162
2163 AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
2164
2165 // Query execution
2166 TSQLResult* aResult = 0;
9d733021 2167 aResult = dynamic_cast<TSQLResult*> (fServer[system]->Query(sqlQuery));
9e080f92 2168 if (!aResult) {
9d733021 2169 Log(detector, Form("GetFileName - Can't execute SQL query to %s database for: id = %s, source = %s",
2170 GetSystemName(system), id, sourceName.Data()));
9e080f92 2171 return 0;
2172 }
2173
2174 if(aResult->GetRowCount() == 0)
2175 {
2176 Log(detector,
9d733021 2177 Form("GetFileName - No entry in %s FXS db for: id = %s, source = %s",
2178 GetSystemName(system), id, sourceName.Data()));
9e080f92 2179 delete aResult;
2180 return 0;
2181 }
2bb7b766 2182
9e080f92 2183 if (aResult->GetRowCount() > 1) {
2184 Log(detector,
9d733021 2185 Form("GetFileName - More than one entry in %s FXS db for: id = %s, source = %s",
2186 GetSystemName(system), id, sourceName.Data()));
9e080f92 2187 delete aResult;
2188 return 0;
2189 }
2190
9d733021 2191 if (aResult->GetFieldCount() != nFields) {
9e080f92 2192 Log(detector,
9d733021 2193 Form("GetFileName - Wrong field count in %s FXS db for: id = %s, source = %s",
2194 GetSystemName(system), id, sourceName.Data()));
9e080f92 2195 delete aResult;
2196 return 0;
2197 }
2198
2199 TSQLRow* aRow = dynamic_cast<TSQLRow*> (aResult->Next());
2200
2201 if (!aRow){
9d733021 2202 Log(detector, Form("GetFileName - Empty set result in %s FXS db from query: id = %s, source = %s",
2203 GetSystemName(system), id, sourceName.Data()));
9e080f92 2204 delete aResult;
2205 return 0;
2206 }
2207
2208 TString filePath(aRow->GetField(0), aRow->GetFieldLength(0));
2209 TString fileSize(aRow->GetField(1), aRow->GetFieldLength(1));
d386d623 2210 TString fileChecksum(aRow->GetField(2), aRow->GetFieldLength(2));
9e080f92 2211
2212 delete aResult;
2213 delete aRow;
2214
d386d623 2215 AliDebug(2, Form("filePath = %s; size = %s, fileChecksum = %s",
2216 filePath.Data(), fileSize.Data(), fileChecksum.Data()));
9e080f92 2217
9e080f92 2218 // retrieved file is renamed to make it unique
675f64cd 2219 TString localFileName = Form("%s/%s_%d_process/%s_%s_%d_%s_%s.shuttle",
2220 GetShuttleTempDir(), detector, GetCurrentRun(),
d524ade6 2221 GetSystemName(system), detector, GetCurrentRun(),
2222 id, sourceName.Data());
9d733021 2223
9e080f92 2224
9d733021 2225 // file retrieval from FXS
4b95672b 2226 UInt_t nRetries = 0;
2227 UInt_t maxRetries = 3;
2228 Bool_t result = kFALSE;
2229
2230 // copy!! if successful TSystem::Exec returns 0
2231 while(nRetries++ < maxRetries) {
2232 AliDebug(2, Form("Trying to copy file. Retry # %d", nRetries));
2233 result = RetrieveFile(system, filePath.Data(), localFileName.Data());
2234 if(!result)
2235 {
2236 Log(detector, Form("GetFileName - Copy of file %s from %s FXS failed",
9d733021 2237 filePath.Data(), GetSystemName(system)));
4b95672b 2238 continue;
4f0749a8 2239 }
9e080f92 2240
d386d623 2241 if (fileChecksum.Length()>0)
4b95672b 2242 {
2243 // compare md5sum of local file with the one stored in the FXS DB
d524ade6 2244 Int_t md5Comp = gSystem->Exec(Form("md5sum %s |grep %s 2>&1 > /dev/null",
2245 localFileName.Data(), fileChecksum.Data()));
9e080f92 2246
4b95672b 2247 if (md5Comp != 0)
2248 {
2249 Log(detector, Form("GetFileName - md5sum of file %s does not match with local copy!",
2250 filePath.Data()));
2251 result = kFALSE;
2252 continue;
2253 }
d386d623 2254 } else {
2255 Log(fCurrentDetector, Form("GetFile - md5sum of file %s not set in %s database, skipping comparison",
2256 filePath.Data(), GetSystemName(system)));
9d733021 2257 }
4b95672b 2258 if (result) break;
9e080f92 2259 }
2260
4b95672b 2261 if(!result) return 0;
2262
9d733021 2263 fFXSCalled[system]=kTRUE;
2264 TObjString *fileParams = new TObjString(Form("%s#!?!#%s", id, sourceName.Data()));
2265 fFXSlist[system].Add(fileParams);
9e080f92 2266
675f64cd 2267 static TString staticLocalFileName;
2268 staticLocalFileName.Form("%s", localFileName.Data());
2269
c88ad5db 2270 Log(fCurrentDetector, Form("GetFile - Retrieved file with id %s and "
2271 "source %s from %s to %s", id, source,
d524ade6 2272 GetSystemName(system), localFileName.Data()));
675f64cd 2273
d524ade6 2274 return staticLocalFileName.Data();
2bb7b766 2275}
2276
2277//______________________________________________________________________________________________
9d733021 2278Bool_t AliShuttle::RetrieveFile(UInt_t system, const char* fxsFileName, const char* localFileName)
9e080f92 2279{
9827400b 2280 //
2281 // Copies file from FXS to local Shuttle machine
2282 //
2bb7b766 2283
9e080f92 2284 // check temp directory: trying to cd to temp; if it does not exist, create it
d524ade6 2285 AliDebug(2, Form("Copy file %s from %s FXS into %s",
2286 GetSystemName(system), fxsFileName, localFileName));
2287
2288 TString tmpDir(localFileName);
2289
2290 tmpDir = tmpDir(0,tmpDir.Last('/'));
9e080f92 2291
d524ade6 2292 Int_t noDir = gSystem->GetPathInfo(tmpDir.Data(), 0, (Long64_t*) 0, 0, 0);
2293 if (noDir) // temp dir does not exists!
2294 {
2295 if (gSystem->mkdir(tmpDir.Data(), 1))
2296 {
2297 Log(fCurrentDetector.Data(), "RetrieveFile - could not make temp directory!!");
9e080f92 2298 return kFALSE;
2299 }
9e080f92 2300 }
2301
9d733021 2302 TString baseFXSFolder;
2303 if (system == kDAQ)
2304 {
2305 baseFXSFolder = "FES/";
2306 }
2307 else if (system == kDCS)
2308 {
2309 baseFXSFolder = "";
2310 }
2311 else if (system == kHLT)
2312 {
42fde080 2313 baseFXSFolder = "/opt/FXS/";
9d733021 2314 }
2315
2316
d524ade6 2317 TString command = Form("scp -oPort=%d -2 %s@%s:%s%s %s",
9d733021 2318 fConfig->GetFXSPort(system),
2319 fConfig->GetFXSUser(system),
2320 fConfig->GetFXSHost(system),
2321 baseFXSFolder.Data(),
2322 fxsFileName,
9e080f92 2323 localFileName);
2324
2325 AliDebug(2, Form("%s",command.Data()));
2326
4b95672b 2327 Bool_t result = (gSystem->Exec(command.Data()) == 0);
9e080f92 2328
4b95672b 2329 return result;
9e080f92 2330}
2331
2332//______________________________________________________________________________________________
9d733021 2333TList* AliShuttle::GetFileSources(Int_t system, const char* detector, const char* id)
2334{
9827400b 2335 //
2336 // Get sources producing the condition file Id from file exchange servers
4a33bdd9 2337 // if id is NULL all sources are returned (distinct)
9827400b 2338 //
1bcd28db 2339
2340 Log(detector, Form("GetFileSources - Retrieving sources with id %s from %s", id, GetSystemName(system)));
9827400b 2341
2342 // check if test mode should simulate a FXS error
2343 if (fTestMode & kErrorFXSSources)
2344 {
2345 Log(detector, Form("GetFileSources - In TESTMODE - Simulating error while connecting to %s FXS", GetSystemName(system)));
2346 return 0;
2347 }
2348
9d733021 2349 if (system == kDCS)
2350 {
c88ad5db 2351 Log(detector, "GetFileSources - WARNING: DCS system has only one source of data!");
6297b37d 2352 TList *list = new TList();
2353 list->SetOwner(1);
2354 list->Add(new TObjString(" "));
2355 return list;
9d733021 2356 }
9e080f92 2357
2358 // check connection, in case connect
9d733021 2359 if (!Connect(system))
2360 {
4a33bdd9 2361 Log(detector, Form("GetFileSources - Couldn't connect to %s FXS database", GetSystemName(system)));
9d733021 2362 return NULL;
9e080f92 2363 }
2364
9d733021 2365 TString sourceName = 0;
2366 if (system == kDAQ)
2367 {
2368 sourceName = "DAQsource";
2369 } else if (system == kHLT)
2370 {
2371 sourceName = "DDLnumbers";
2372 }
2373
4a33bdd9 2374 TString sqlQueryStart = Form("select distinct %s from %s where", sourceName.Data(), fConfig->GetFXSdbTable(system));
2375 TString whereClause = Form("run=%d and detector=\"%s\"",
2376 GetCurrentRun(), detector);
2377 if (id)
2378 whereClause += Form(" and fileId=\"%s\"", id);
9e080f92 2379 TString sqlQuery = Form("%s %s", sqlQueryStart.Data(), whereClause.Data());
2380
2381 AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
2382
2383 // Query execution
2384 TSQLResult* aResult;
9d733021 2385 aResult = fServer[system]->Query(sqlQuery);
9e080f92 2386 if (!aResult) {
9d733021 2387 Log(detector, Form("GetFileSources - Can't execute SQL query to %s database for id: %s",
2388 GetSystemName(system), id));
9e080f92 2389 return 0;
2390 }
2391
86aa42c3 2392 TList *list = new TList();
2393 list->SetOwner(1);
2394
9d733021 2395 if (aResult->GetRowCount() == 0)
2396 {
9e080f92 2397 Log(detector,
9d733021 2398 Form("GetFileSources - No entry in %s FXS table for id: %s", GetSystemName(system), id));
9e080f92 2399 delete aResult;
86aa42c3 2400 return list;
9e080f92 2401 }
2402
1bcd28db 2403 Log(detector, Form("GetFileSources - Found %d sources", aResult->GetRowCount()));
9e080f92 2404
1bcd28db 2405 TSQLRow* aRow;
9d733021 2406 while ((aRow = aResult->Next()))
2407 {
9e080f92 2408
9d733021 2409 TString source(aRow->GetField(0), aRow->GetFieldLength(0));
2410 AliDebug(2, Form("%s = %s", sourceName.Data(), source.Data()));
2411 list->Add(new TObjString(source));
9e080f92 2412 delete aRow;
2413 }
9d733021 2414
9e080f92 2415 delete aResult;
2416
2417 return list;
2bb7b766 2418}
2419
2420//______________________________________________________________________________________________
4a33bdd9 2421TList* AliShuttle::GetFileIDs(Int_t system, const char* detector, const char* source)
2422{
2423 //
2424 // Get all ids of condition files produced by a given source from file exchange servers
2425 //
2426
1bcd28db 2427 Log(detector, Form("GetFileIDs - Retrieving ids with source %s with %s", source, GetSystemName(system)));
2428
4a33bdd9 2429 // check if test mode should simulate a FXS error
2430 if (fTestMode & kErrorFXSSources)
2431 {
2432 Log(detector, Form("GetFileIDs - In TESTMODE - Simulating error while connecting to %s FXS", GetSystemName(system)));
2433 return 0;
2434 }
2435
2436 // check connection, in case connect
2437 if (!Connect(system))
2438 {
2439 Log(detector, Form("GetFileIDs - Couldn't connect to %s FXS database", GetSystemName(system)));
2440 return NULL;
2441 }
2442
2443 TString sourceName = 0;
2444 if (system == kDAQ)
2445 {
2446 sourceName = "DAQsource";
2447 } else if (system == kHLT)
2448 {
2449 sourceName = "DDLnumbers";
2450 }
2451
2452 TString sqlQueryStart = Form("select fileId from %s where", fConfig->GetFXSdbTable(system));
2453 TString whereClause = Form("run=%d and detector=\"%s\"",
2454 GetCurrentRun(), detector);
2455 if (sourceName.Length() > 0 && source)
2456 whereClause += Form(" and %s=\"%s\"", sourceName.Data(), source);
2457 TString sqlQuery = Form("%s %s", sqlQueryStart.Data(), whereClause.Data());
2458
2459 AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
2460
2461 // Query execution
2462 TSQLResult* aResult;
2463 aResult = fServer[system]->Query(sqlQuery);
2464 if (!aResult) {
2465 Log(detector, Form("GetFileIDs - Can't execute SQL query to %s database for source: %s",
2466 GetSystemName(system), source));
2467 return 0;
2468 }
2469
2470 TList *list = new TList();
2471 list->SetOwner(1);
2472
2473 if (aResult->GetRowCount() == 0)
2474 {
2475 Log(detector,
2476 Form("GetFileIDs - No entry in %s FXS table for source: %s", GetSystemName(system), source));
2477 delete aResult;
2478 return list;
2479 }
2480
1bcd28db 2481 Log(detector, Form("GetFileIDs - Found %d ids", aResult->GetRowCount()));
2482
4a33bdd9 2483 TSQLRow* aRow;
2484
2485 while ((aRow = aResult->Next()))
2486 {
2487
2488 TString id(aRow->GetField(0), aRow->GetFieldLength(0));
2489 AliDebug(2, Form("fileId = %s", id.Data()));
2490 list->Add(new TObjString(id));
2491 delete aRow;
2492 }
2493
2494 delete aResult;
2495
2496 return list;
2497}
2498
2499//______________________________________________________________________________________________
9d733021 2500Bool_t AliShuttle::Connect(Int_t system)
2bb7b766 2501{
9827400b 2502 // Connect to MySQL Server of the system's FXS MySQL databases
2503 // DAQ Logbook, Shuttle Logbook and DAQ FXS db are on the same host
2504 //
57f50b3c 2505
9d733021 2506 // check connection: if already connected return
2507 if(fServer[system] && fServer[system]->IsConnected()) return kTRUE;
57f50b3c 2508
9d733021 2509 TString dbHost, dbUser, dbPass, dbName;
57f50b3c 2510
9d733021 2511 if (system < 3) // FXS db servers
2512 {
2513 dbHost = Form("mysql://%s:%d", fConfig->GetFXSdbHost(system), fConfig->GetFXSdbPort(system));
2514 dbUser = fConfig->GetFXSdbUser(system);
2515 dbPass = fConfig->GetFXSdbPass(system);
2516 dbName = fConfig->GetFXSdbName(system);
2517 } else { // Run & Shuttle logbook servers
2518 // TODO Will the Shuttle logbook server be the same as the Run logbook server ???
2519 dbHost = Form("mysql://%s:%d", fConfig->GetDAQlbHost(), fConfig->GetDAQlbPort());
2520 dbUser = fConfig->GetDAQlbUser();
2521 dbPass = fConfig->GetDAQlbPass();
2522 dbName = fConfig->GetDAQlbDB();
2523 }
57f50b3c 2524
9d733021 2525 fServer[system] = TSQLServer::Connect(dbHost.Data(), dbUser.Data(), dbPass.Data());
2526 if (!fServer[system] || !fServer[system]->IsConnected()) {
2527 if(system < 3)
2528 {
2529 AliError(Form("Can't establish connection to FXS database for %s",
2530 AliShuttleInterface::GetSystemName(system)));
2531 } else {
2532 AliError("Can't establish connection to Run logbook.");
57f50b3c 2533 }
9d733021 2534 if(fServer[system]) delete fServer[system];
2535 return kFALSE;
2bb7b766 2536 }
57f50b3c 2537
9d733021 2538 // Get tables
2539 TSQLResult* aResult=0;
2540 switch(system){
2541 case kDAQ:
2542 aResult = fServer[kDAQ]->GetTables(dbName.Data());
2543 break;
2544 case kDCS:
2545 aResult = fServer[kDCS]->GetTables(dbName.Data());
2546 break;
2547 case kHLT:
2548 aResult = fServer[kHLT]->GetTables(dbName.Data());
2549 break;
2550 default:
2551 aResult = fServer[3]->GetTables(dbName.Data());
2552 break;
2553 }
2554
2555 delete aResult;
2bb7b766 2556 return kTRUE;
2557}
57f50b3c 2558
9e080f92 2559//______________________________________________________________________________________________
9d733021 2560Bool_t AliShuttle::UpdateTable()
9e080f92 2561{
9827400b 2562 //
2563 // Update FXS table filling time_processed field in all rows corresponding to current run and detector
2564 //
9e080f92 2565
9d733021 2566 Bool_t result = kTRUE;
9e080f92 2567
9d733021 2568 for (UInt_t system=0; system<3; system++)
2569 {
2570 if(!fFXSCalled[system]) continue;
9e080f92 2571
9d733021 2572 // check connection, in case connect
2573 if (!Connect(system))
2574 {
2575 Log(fCurrentDetector, Form("UpdateTable - Couldn't connect to %s FXS database", GetSystemName(system)));
2576 result = kFALSE;
2577 continue;
9e080f92 2578 }
9e080f92 2579
9d733021 2580 TTimeStamp now; // now
2581
2582 // Loop on FXS list entries
2583 TIter iter(&fFXSlist[system]);
2584 TObjString *aFXSentry=0;
2585 while ((aFXSentry = dynamic_cast<TObjString*> (iter.Next())))
2586 {
2587 TString aFXSentrystr = aFXSentry->String();
2588 TObjArray *aFXSarray = aFXSentrystr.Tokenize("#!?!#");
2589 if (!aFXSarray || aFXSarray->GetEntries() != 2 )
2590 {
2591 Log(fCurrentDetector, Form("UpdateTable - error updating %s FXS entry. Check string: <%s>",
2592 GetSystemName(system), aFXSentrystr.Data()));
2593 if(aFXSarray) delete aFXSarray;
2594 result = kFALSE;
2595 continue;
2596 }
2597 const char* fileId = ((TObjString*) aFXSarray->At(0))->GetName();
2598 const char* source = ((TObjString*) aFXSarray->At(1))->GetName();
2599
2600 TString whereClause;
2601 if (system == kDAQ)
2602 {
2603 whereClause = Form("where run=%d and detector=\"%s\" and fileId=\"%s\" and DAQsource=\"%s\";",
2604 GetCurrentRun(), fCurrentDetector.Data(), fileId, source);
2605 }
2606 else if (system == kDCS)
2607 {
2608 whereClause = Form("where run=%d and detector=\"%s\" and fileId=\"%s\";",
2609 GetCurrentRun(), fCurrentDetector.Data(), fileId);
2610 }
2611 else if (system == kHLT)
2612 {
2613 whereClause = Form("where run=%d and detector=\"%s\" and fileId=\"%s\" and DDLnumbers=\"%s\";",
2614 GetCurrentRun(), fCurrentDetector.Data(), fileId, source);
2615 }
2616
2617 delete aFXSarray;
9e080f92 2618
9d733021 2619 TString sqlQuery = Form("update %s set time_processed=%d %s", fConfig->GetFXSdbTable(system),
2620 now.GetSec(), whereClause.Data());
9e080f92 2621
9d733021 2622 AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
9e080f92 2623
9d733021 2624 // Query execution
2625 TSQLResult* aResult;
2626 aResult = dynamic_cast<TSQLResult*> (fServer[system]->Query(sqlQuery));
2627 if (!aResult)
2628 {
2629 Log(fCurrentDetector, Form("UpdateTable - %s db: can't execute SQL query <%s>",
2630 GetSystemName(system), sqlQuery.Data()));
2631 result = kFALSE;
2632 continue;
2633 }
2634 delete aResult;
9e080f92 2635 }
9e080f92 2636 }
2637
9d733021 2638 return result;
9e080f92 2639}
57f50b3c 2640
2bb7b766 2641//______________________________________________________________________________________________
3301427a 2642Bool_t AliShuttle::UpdateTableFailCase()
2643{
9827400b 2644 // Update FXS table filling time_processed field in all rows corresponding to current run and detector
2645 // this is called in case the preprocessor is declared failed for the current run, because
2646 // the fields are updated only in case of success
3301427a 2647
2648 Bool_t result = kTRUE;
2649
2650 for (UInt_t system=0; system<3; system++)
2651 {
2652 // check connection, in case connect
2653 if (!Connect(system))
2654 {
2655 Log(fCurrentDetector, Form("UpdateTableFailCase - Couldn't connect to %s FXS database",
2656 GetSystemName(system)));
2657 result = kFALSE;
2658 continue;
2659 }
2660
2661 TTimeStamp now; // now
2662
2663 // Loop on FXS list entries
2664
2665 TString whereClause = Form("where run=%d and detector=\"%s\";",
2666 GetCurrentRun(), fCurrentDetector.Data());
2667
2668
2669 TString sqlQuery = Form("update %s set time_processed=%d %s", fConfig->GetFXSdbTable(system),
2670 now.GetSec(), whereClause.Data());
2671
2672 AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
2673
2674 // Query execution
2675 TSQLResult* aResult;
2676 aResult = dynamic_cast<TSQLResult*> (fServer[system]->Query(sqlQuery));
2677 if (!aResult)
2678 {
2679 Log(fCurrentDetector, Form("UpdateTableFailCase - %s db: can't execute SQL query <%s>",
2680 GetSystemName(system), sqlQuery.Data()));
2681 result = kFALSE;
2682 continue;
2683 }
2684 delete aResult;
2685 }
2686
2687 return result;
2688}
2689
2690//______________________________________________________________________________________________
2bb7b766 2691Bool_t AliShuttle::UpdateShuttleLogbook(const char* detector, const char* status)
2692{
e7f62f16 2693 //
2694 // Update Shuttle logbook filling detector or shuttle_done column
2695 // ex. of usage: UpdateShuttleLogbook("PHOS", "DONE") or UpdateShuttleLogbook("shuttle_done")
2696 //
57f50b3c 2697
2bb7b766 2698 // check connection, in case connect
be48e3ea 2699 if(!Connect(3)){
2bb7b766 2700 Log("SHUTTLE", "UpdateShuttleLogbook - Couldn't connect to DAQ Logbook.");
2701 return kFALSE;
57f50b3c 2702 }
2703
2bb7b766 2704 TString detName(detector);
2705 TString setClause;
e7f62f16 2706 if(detName == "shuttle_done")
2707 {
2bb7b766 2708 setClause = "set shuttle_done=1";
e7f62f16 2709
b0e53b15 2710 if (fMonaLisa)
2711 {
2712 // Send the information to ML
2713 TMonaLisaText mlStatus("SHUTTLE_status", "Done");
e7f62f16 2714
b0e53b15 2715 TList mlList;
2716 mlList.Add(&mlStatus);
2717
2718 fMonaLisa->SendParameters(&mlList);
2719 }
2bb7b766 2720 } else {
2bb7b766 2721 TString statusStr(status);
2722 if(statusStr.Contains("done", TString::kIgnoreCase) ||
2723 statusStr.Contains("failed", TString::kIgnoreCase)){
eba76848 2724 setClause = Form("set %s=\"%s\"", detector, status);
2bb7b766 2725 } else {
2726 Log("SHUTTLE",
2727 Form("UpdateShuttleLogbook - Invalid status <%s> for detector %s",
2728 status, detector));
2729 return kFALSE;
2730 }
2731 }
57f50b3c 2732
2bb7b766 2733 TString whereClause = Form("where run=%d", GetCurrentRun());
2734
441b0e9c 2735 TString sqlQuery = Form("update %s %s %s",
2736 fConfig->GetShuttlelbTable(), setClause.Data(), whereClause.Data());
57f50b3c 2737
2bb7b766 2738 AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
2739
2740 // Query execution
2741 TSQLResult* aResult;
be48e3ea 2742 aResult = dynamic_cast<TSQLResult*> (fServer[3]->Query(sqlQuery));
2bb7b766 2743 if (!aResult) {
2744 Log("SHUTTLE", Form("UpdateShuttleLogbook - Can't execute query <%s>", sqlQuery.Data()));
2745 return kFALSE;
57f50b3c 2746 }
2bb7b766 2747 delete aResult;
57f50b3c 2748
2749 return kTRUE;
2750}
2751
2752//______________________________________________________________________________________________
2bb7b766 2753Int_t AliShuttle::GetCurrentRun() const
2754{
9827400b 2755 //
2756 // Get current run from logbook entry
2757 //
57f50b3c 2758
2bb7b766 2759 return fLogbookEntry ? fLogbookEntry->GetRun() : -1;
57f50b3c 2760}
2761
2762//______________________________________________________________________________________________
2bb7b766 2763UInt_t AliShuttle::GetCurrentStartTime() const
2764{
9827400b 2765 //
2766 // get current start time
2767 //
57f50b3c 2768
2bb7b766 2769 return fLogbookEntry ? fLogbookEntry->GetStartTime() : 0;
57f50b3c 2770}
2771
2772//______________________________________________________________________________________________
2bb7b766 2773UInt_t AliShuttle::GetCurrentEndTime() const
2774{
9827400b 2775 //
2776 // get current end time from logbook entry
2777 //
57f50b3c 2778
2bb7b766 2779 return fLogbookEntry ? fLogbookEntry->GetEndTime() : 0;
57f50b3c 2780}
2781
2782//______________________________________________________________________________________________
675f64cd 2783UInt_t AliShuttle::GetCurrentYear() const
2784{
2785 //
2786 // Get current year from logbook entry
2787 //
2788
2789 if (!fLogbookEntry) return 0;
2790
2791 TTimeStamp startTime(GetCurrentStartTime());
2792 TString year = Form("%d",startTime.GetDate());
2793 year = year(0,4);
2794
2795 return year.Atoi();
2796}
2797
2798//______________________________________________________________________________________________
2799const char* AliShuttle::GetLHCPeriod() const
2800{
2801 //
2802 // Get current LHC period from logbook entry
2803 //
2804
2805 if (!fLogbookEntry) return 0;
2806
2807 return fLogbookEntry->GetRunParameter("LHCperiod");
2808}
2809
2810//______________________________________________________________________________________________
b948db8d 2811void AliShuttle::Log(const char* detector, const char* message)
2812{
9827400b 2813 //
2814 // Fill log string with a message
2815 //
b948db8d 2816
7d4cf768 2817 TString logRunDir = GetShuttleLogDir();
2818 if (GetCurrentRun() >=0)
2819 logRunDir += Form("/%d", GetCurrentRun());
2820
2821 void* dir = gSystem->OpenDirectory(logRunDir.Data());
84090f85 2822 if (dir == NULL) {
7d4cf768 2823 if (gSystem->mkdir(logRunDir.Data(), kTRUE)) {
36c99a6a 2824 AliError(Form("Can't open directory <%s>", GetShuttleLogDir()));
84090f85 2825 return;
2826 }
b948db8d 2827
84090f85 2828 } else {
2829 gSystem->FreeDirectory(dir);
2830 }
b948db8d 2831
cb343cfd 2832 TString toLog = Form("%s (%d): %s - ", TTimeStamp(time(0)).AsString("s"), getpid(), detector);
e7f62f16 2833 if (GetCurrentRun() >= 0)
2834 toLog += Form("run %d - ", GetCurrentRun());
2bb7b766 2835 toLog += Form("%s", message);
2836
84090f85 2837 AliInfo(toLog.Data());
ffa29e93 2838
2839 // if we redirect the log output already to the file, leave here
2840 if (fOutputRedirected && strcmp(detector, "SHUTTLE") != 0)
2841 return;
b948db8d 2842
ffa29e93 2843 TString fileName = GetLogFileName(detector);
e7f62f16 2844
84090f85 2845 gSystem->ExpandPathName(fileName);
2846
2847 ofstream logFile;
2848 logFile.open(fileName, ofstream::out | ofstream::app);
2849
2850 if (!logFile.is_open()) {
2851 AliError(Form("Could not open file %s", fileName.Data()));
2852 return;
2853 }
7bfb2090 2854
84090f85 2855 logFile << toLog.Data() << "\n";
b948db8d 2856
84090f85 2857 logFile.close();
b948db8d 2858}
2bb7b766 2859
2bb7b766 2860//______________________________________________________________________________________________
ffa29e93 2861TString AliShuttle::GetLogFileName(const char* detector) const
2862{
2863 //
2864 // returns the name of the log file for a given sub detector
2865 //
2866
2867 TString fileName;
2868
2869 if (GetCurrentRun() >= 0)
7d4cf768 2870 {
2871 fileName.Form("%s/%d/%s_%d.log", GetShuttleLogDir(), GetCurrentRun(),
2872 detector, GetCurrentRun());
2873 } else {
ffa29e93 2874 fileName.Form("%s/%s.log", GetShuttleLogDir(), detector);
7d4cf768 2875 }
ffa29e93 2876
2877 return fileName;
2878}
2879
2880//______________________________________________________________________________________________
2bb7b766 2881Bool_t AliShuttle::Collect(Int_t run)
2882{
9827400b 2883 //
2884 // Collects conditions data for all UNPROCESSED run written to DAQ LogBook in case of run = -1 (default)
2885 // If a dedicated run is given this run is processed
2886 //
2887 // In operational mode, this is the Shuttle function triggered by the EOR signal.
2888 //
2bb7b766 2889
eba76848 2890 if (run == -1)
2891 Log("SHUTTLE","Collect - Shuttle called. Collecting conditions data for unprocessed runs");
2892 else
2893 Log("SHUTTLE", Form("Collect - Shuttle called. Collecting conditions data for run %d", run));
cb343cfd 2894
2895 SetLastAction("Starting");
2bb7b766 2896
2897 TString whereClause("where shuttle_done=0");
eba76848 2898 if (run != -1)
2899 whereClause += Form(" and run=%d", run);
2bb7b766 2900
2901 TObjArray shuttleLogbookEntries;
be48e3ea 2902 if (!QueryShuttleLogbook(whereClause, shuttleLogbookEntries))
2903 {
cb343cfd 2904 Log("SHUTTLE", "Collect - Can't retrieve entries from Shuttle logbook");
2bb7b766 2905 return kFALSE;
2906 }
2907
9e080f92 2908 if (shuttleLogbookEntries.GetEntries() == 0)
2909 {
2910 if (run == -1)
2911 Log("SHUTTLE","Collect - Found no UNPROCESSED runs in Shuttle logbook");
2912 else
2913 Log("SHUTTLE", Form("Collect - Run %d is already DONE "
2914 "or it does not exist in Shuttle logbook", run));
2915 return kTRUE;
2916 }
2917
be48e3ea 2918 for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
2919 fFirstUnprocessed[iDet] = kTRUE;
2920
fc5a4708 2921 if (run != -1)
be48e3ea 2922 {
2923 // query Shuttle logbook for earlier runs, check if some detectors are unprocessed,
2924 // flag them into fFirstUnprocessed array
2925 TString whereClause(Form("where shuttle_done=0 and run < %d", run));
2926 TObjArray tmpLogbookEntries;
2927 if (!QueryShuttleLogbook(whereClause, tmpLogbookEntries))
2928 {
2929 Log("SHUTTLE", "Collect - Can't retrieve entries from Shuttle logbook");
2930 return kFALSE;
2931 }
2932
2933 TIter iter(&tmpLogbookEntries);
2934 AliShuttleLogbookEntry* anEntry = 0;
2935 while ((anEntry = dynamic_cast<AliShuttleLogbookEntry*> (iter.Next())))
2936 {
2937 for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
2938 {
2939 if (anEntry->GetDetectorStatus(iDet) == AliShuttleLogbookEntry::kUnprocessed)
2940 {
2941 AliDebug(2, Form("Run %d: setting %s as \"not first time unprocessed\"",
2942 anEntry->GetRun(), GetDetName(iDet)));
2943 fFirstUnprocessed[iDet] = kFALSE;
2944 }
2945 }
2946
2947 }
2948
2949 }
2950
2951 if (!RetrieveConditionsData(shuttleLogbookEntries))
2952 {
cb343cfd 2953 Log("SHUTTLE", "Collect - Process of at least one run failed");
2bb7b766 2954 return kFALSE;
2955 }
2956
36c99a6a 2957 Log("SHUTTLE", "Collect - Requested run(s) successfully processed");
eba76848 2958 return kTRUE;
2bb7b766 2959}
2960
2bb7b766 2961//______________________________________________________________________________________________
2962Bool_t AliShuttle::RetrieveConditionsData(const TObjArray& dateEntries)
2963{
9827400b 2964 //
2965 // Retrieve conditions data for all runs that aren't processed yet
2966 //
2bb7b766 2967
2968 Bool_t hasError = kFALSE;
2969
2970 TIter iter(&dateEntries);
2971 AliShuttleLogbookEntry* anEntry;
2972
2973 while ((anEntry = (AliShuttleLogbookEntry*) iter.Next())){
2974 if (!Process(anEntry)){
2975 hasError = kTRUE;
2976 }
4b95672b 2977
2978 // clean SHUTTLE temp directory
d524ade6 2979 //TString filename = Form("%s/*.shuttle", GetShuttleTempDir());
2980 //RemoveFile(filename.Data());
2bb7b766 2981 }
2982
2983 return hasError == kFALSE;
2984}
cb343cfd 2985
2986//______________________________________________________________________________________________
2987ULong_t AliShuttle::GetTimeOfLastAction() const
2988{
9827400b 2989 //
2990 // Gets time of last action
2991 //
2992
cb343cfd 2993 ULong_t tmp;
36c99a6a 2994
cb343cfd 2995 fMonitoringMutex->Lock();
be48e3ea 2996
cb343cfd 2997 tmp = fLastActionTime;
36c99a6a 2998
cb343cfd 2999 fMonitoringMutex->UnLock();
36c99a6a 3000
cb343cfd 3001 return tmp;
3002}
3003
3004//______________________________________________________________________________________________
3005const TString AliShuttle::GetLastAction() const
3006{
9827400b 3007 //
cb343cfd 3008 // returns a string description of the last action
9827400b 3009 //
cb343cfd 3010
3011 TString tmp;
36c99a6a 3012
cb343cfd 3013 fMonitoringMutex->Lock();
3014
3015 tmp = fLastAction;
3016
3017 fMonitoringMutex->UnLock();
3018
36c99a6a 3019 return tmp;
cb343cfd 3020}
3021
3022//______________________________________________________________________________________________
3023void AliShuttle::SetLastAction(const char* action)
3024{
9827400b 3025 //
cb343cfd 3026 // updates the monitoring variables
9827400b 3027 //
36c99a6a 3028
cb343cfd 3029 fMonitoringMutex->Lock();
36c99a6a 3030
cb343cfd 3031 fLastAction = action;
3032 fLastActionTime = time(0);
3033
3034 fMonitoringMutex->UnLock();
3035}
eba76848 3036
3037//______________________________________________________________________________________________
3038const char* AliShuttle::GetRunParameter(const char* param)
3039{
9827400b 3040 //
3041 // returns run parameter read from DAQ logbook
3042 //
eba76848 3043
3044 if(!fLogbookEntry) {
3045 AliError("No logbook entry!");
3046 return 0;
3047 }
3048
3049 return fLogbookEntry->GetRunParameter(param);
3050}
57c1a579 3051
3052//______________________________________________________________________________________________
9827400b 3053AliCDBEntry* AliShuttle::GetFromOCDB(const char* detector, const AliCDBPath& path)
d386d623 3054{
9827400b 3055 //
3056 // returns object from OCDB valid for current run
3057 //
d386d623 3058
9827400b 3059 if (fTestMode & kErrorOCDB)
3060 {
3061 Log(detector, "GetFromOCDB - In TESTMODE - Simulating error with OCDB");
3062 return 0;
3063 }
3064
d386d623 3065 AliCDBStorage *sto = AliCDBManager::Instance()->GetStorage(fgkMainCDB);
3066 if (!sto)
3067 {
9827400b 3068 Log(detector, "GetFromOCDB - Cannot activate main OCDB for query!");
d386d623 3069 return 0;
3070 }
3071
3072 return dynamic_cast<AliCDBEntry*> (sto->Get(path, GetCurrentRun()));
3073}
3074
3075//______________________________________________________________________________________________
57c1a579 3076Bool_t AliShuttle::SendMail()
3077{
9827400b 3078 //
3079 // sends a mail to the subdetector expert in case of preprocessor error
3080 //
3081
3082 if (fTestMode != kNone)
3083 return kTRUE;
57c1a579 3084
36c99a6a 3085 void* dir = gSystem->OpenDirectory(GetShuttleLogDir());
57c1a579 3086 if (dir == NULL)
3087 {
36c99a6a 3088 if (gSystem->mkdir(GetShuttleLogDir(), kTRUE))
57c1a579 3089 {
675f64cd 3090 Log("SHUTTLE", Form("SendMail - Can't open directory <%s>", GetShuttleLogDir()));
57c1a579 3091 return kFALSE;
3092 }
3093
3094 } else {
3095 gSystem->FreeDirectory(dir);
3096 }
3097
3098 TString bodyFileName;
36c99a6a 3099 bodyFileName.Form("%s/mail.body", GetShuttleLogDir());
57c1a579 3100 gSystem->ExpandPathName(bodyFileName);
3101
3102 ofstream mailBody;
3103 mailBody.open(bodyFileName, ofstream::out);
3104
3105 if (!mailBody.is_open())
3106 {
675f64cd 3107 Log("SHUTTLE", Form("Could not open mail body file %s", bodyFileName.Data()));
57c1a579 3108 return kFALSE;
3109 }
3110
3111 TString to="";
3112 TIter iterExperts(fConfig->GetResponsibles(fCurrentDetector));
3113 TObjString *anExpert=0;
3114 while ((anExpert = (TObjString*) iterExperts.Next()))
3115 {
3116 to += Form("%s,", anExpert->GetName());
3117 }
3118 to.Remove(to.Length()-1);
909732f7 3119 AliDebug(2, Form("to: %s",to.Data()));
57c1a579 3120
86aa42c3 3121 if (to.IsNull()) {
c88ad5db 3122 Log("SHUTTLE", "List of detector responsibles not yet set!");
36c99a6a 3123 return kFALSE;
3124 }
3125
57c1a579 3126 TString cc="alberto.colla@cern.ch";
3127
546242fb 3128 TString subject = Form("%s Shuttle preprocessor FAILED in run %d !",
57c1a579 3129 fCurrentDetector.Data(), GetCurrentRun());
909732f7 3130 AliDebug(2, Form("subject: %s", subject.Data()));
57c1a579 3131
3132 TString body = Form("Dear %s expert(s), \n\n", fCurrentDetector.Data());
3133 body += Form("SHUTTLE just detected that your preprocessor "
546242fb 3134 "failed processing run %d!!\n\n", GetCurrentRun());
7d4cf768 3135 body += Form("Please check %s status on the SHUTTLE monitoring page: \n\n",
3136 fCurrentDetector.Data());
b0e53b15 3137 if (fConfig->GetRunMode() == AliShuttleConfig::kTest)
3138 {
3139 body += Form("\thttp://pcalimonitor.cern.ch:8889/shuttle.jsp?time=168 \n\n");
3140 } else {
3141 body += Form("\thttp://pcalimonitor.cern.ch/shuttle.jsp?instance=PROD?time=168 \n\n");
3142 }
3143
7d4cf768 3144
3145 TString logFolder = "logs";
3146 if (fConfig->GetRunMode() == AliShuttleConfig::kProd)
3147 logFolder += "_PROD";
3148
3149
546242fb 3150 body += Form("Find the %s log for the current run on \n\n"
7d4cf768 3151 "\thttp://pcalishuttle01.cern.ch:8880/%s/%d/%s_%d.log \n\n",
3152 fCurrentDetector.Data(), logFolder.Data(), GetCurrentRun(),
3153 fCurrentDetector.Data(), GetCurrentRun());
3154 body += Form("The last 10 lines of %s log file are following:\n\n", fCurrentDetector.Data());
57c1a579 3155
909732f7 3156 AliDebug(2, Form("Body begin: %s", body.Data()));