]> git.uio.no Git - u/mrichter/AliRoot.git/blame - SHUTTLE/AliShuttle.cxx
Record changes.
[u/mrichter/AliRoot.git] / SHUTTLE / AliShuttle.cxx
CommitLineData
73abe331 1/**************************************************************************
2 * Copyright(c) 1998-1999, ALICE Experiment at CERN, All rights reserved. *
3 * *
4 * Author: The ALICE Off-line Project. *
5 * Contributors are mentioned in the code where appropriate. *
6 * *
7 * Permission to use, copy, modify and distribute this software and its *
8 * documentation strictly for non-commercial purposes is hereby granted *
9 * without fee, provided that the above copyright notice appears in all *
10 * copies and that both the copyright notice and this permission notice *
11 * appear in the supporting documentation. The authors make no claims *
12 * about the suitability of this software for any purpose. It is *
13 * provided "as is" without express or implied warranty. *
14 **************************************************************************/
15
16/*
17$Log$
d386d623 18Revision 1.30 2007/02/13 11:23:21 acolla
19Moved getters and setters of Shuttle's main OCDB/Reference, local
20OCDB/Reference, temp and log folders to AliShuttleInterface
21
9d733021 22Revision 1.27 2007/01/30 17:52:42 jgrosseo
23adding monalisa monitoring
24
e7f62f16 25Revision 1.26 2007/01/23 19:20:03 acolla
26Removed old ldif files, added TOF, MCH ldif files. Added some options in
27AliShuttleConfig::Print. Added in Ali Shuttle: SetShuttleTempDir and
28SetShuttleLogDir
29
36c99a6a 30Revision 1.25 2007/01/15 19:13:52 acolla
31Moved some AliInfo to AliDebug in SendMail function
32
fc5a4708 33Revision 1.21 2006/12/07 08:51:26 jgrosseo
34update (alberto):
35table, db names in ldap configuration
36added GRP preprocessor
37DCS data can also be retrieved by data point
38
2c15234c 39Revision 1.20 2006/11/16 16:16:48 jgrosseo
40introducing strict run ordering flag
41removed giving preprocessor name to preprocessor, they have to know their name themselves ;-)
42
be48e3ea 43Revision 1.19 2006/11/06 14:23:04 jgrosseo
44major update (Alberto)
45o) reading of run parameters from the logbook
46o) online offline naming conversion
47o) standalone DCSclient package
48
eba76848 49Revision 1.18 2006/10/20 15:22:59 jgrosseo
50o) Adding time out to the execution of the preprocessors: The Shuttle forks and the parent process monitors the child
51o) Merging Collect, CollectAll, CollectNew function
52o) Removing implementation of empty copy constructors (declaration still there!)
53
cb343cfd 54Revision 1.17 2006/10/05 16:20:55 jgrosseo
55adapting to new CDB classes
56
6ec0e06c 57Revision 1.16 2006/10/05 15:46:26 jgrosseo
58applying to the new interface
59
481441a2 60Revision 1.15 2006/10/02 16:38:39 jgrosseo
61update (alberto):
62fixed memory leaks
63storing of objects that failed to be stored to the grid before
64interfacing of shuttle status table in daq system
65
2bb7b766 66Revision 1.14 2006/08/29 09:16:05 jgrosseo
67small update
68
85a80aa9 69Revision 1.13 2006/08/15 10:50:00 jgrosseo
70effc++ corrections (alberto)
71
4f0ab988 72Revision 1.12 2006/08/08 14:19:29 jgrosseo
73Update to shuttle classes (Alberto)
74
75- Possibility to set the full object's path in the Preprocessor's and
76Shuttle's Store functions
77- Possibility to extend the object's run validity in the same classes
78("startValidity" and "validityInfinite" parameters)
79- Implementation of the StoreReferenceData function to store reference
80data in a dedicated CDB storage.
81
84090f85 82Revision 1.11 2006/07/21 07:37:20 jgrosseo
83last run is stored after each run
84
7bfb2090 85Revision 1.10 2006/07/20 09:54:40 jgrosseo
86introducing status management: The processing per subdetector is divided into several steps,
87after each step the status is stored on disk. If the system crashes in any of the steps the Shuttle
88can keep track of the number of failures and skips further processing after a certain threshold is
89exceeded. These thresholds can be configured in LDAP.
90
5164a766 91Revision 1.9 2006/07/19 10:09:55 jgrosseo
92new configuration, accesst to DAQ FES (Alberto)
93
57f50b3c 94Revision 1.8 2006/07/11 12:44:36 jgrosseo
95adding parameters for extended validity range of data produced by preprocessor
96
17111222 97Revision 1.7 2006/07/10 14:37:09 jgrosseo
98small fix + todo comment
99
e090413b 100Revision 1.6 2006/07/10 13:01:41 jgrosseo
101enhanced storing of last sucessfully processed run (alberto)
102
a7160fe9 103Revision 1.5 2006/07/04 14:59:57 jgrosseo
104revision of AliDCSValue: Removed wrapper classes, reduced storage size per value by factor 2
105
45a493ce 106Revision 1.4 2006/06/12 09:11:16 jgrosseo
107coding conventions (Alberto)
108
58bc3020 109Revision 1.3 2006/06/06 14:26:40 jgrosseo
110o) removed files that were moved to STEER
111o) shuttle updated to follow the new interface (Alberto)
112
b948db8d 113Revision 1.2 2006/03/07 07:52:34 hristov
114New version (B.Yordanov)
115
d477ad88 116Revision 1.6 2005/11/19 17:19:14 byordano
117RetrieveDATEEntries and RetrieveConditionsData added
118
119Revision 1.5 2005/11/19 11:09:27 byordano
120AliShuttle declaration added
121
122Revision 1.4 2005/11/17 17:47:34 byordano
123TList changed to TObjArray
124
125Revision 1.3 2005/11/17 14:43:23 byordano
126import to local CVS
127
128Revision 1.1.1.1 2005/10/28 07:33:58 hristov
129Initial import as subdirectory in AliRoot
130
73abe331 131Revision 1.2 2005/09/13 08:41:15 byordano
132default startTime endTime added
133
134Revision 1.4 2005/08/30 09:13:02 byordano
135some docs added
136
137Revision 1.3 2005/08/29 21:15:47 byordano
138some docs added
139
140*/
141
142//
143// This class is the main manager for AliShuttle.
144// It organizes the data retrieval from DCS and call the
b948db8d 145// interface methods of AliPreprocessor.
73abe331 146// For every detector in AliShuttleConfgi (see AliShuttleConfig),
147// data for its set of aliases is retrieved. If there is registered
b948db8d 148// AliPreprocessor for this detector then it will be used
149// accroding to the schema (see AliPreprocessor).
150// If there isn't registered AliPreprocessor than the retrieved
73abe331 151// data is stored automatically to the undelying AliCDBStorage.
152// For detSpec is used the alias name.
153//
154
155#include "AliShuttle.h"
156
157#include "AliCDBManager.h"
158#include "AliCDBStorage.h"
159#include "AliCDBId.h"
84090f85 160#include "AliCDBRunRange.h"
161#include "AliCDBPath.h"
5164a766 162#include "AliCDBEntry.h"
73abe331 163#include "AliShuttleConfig.h"
eba76848 164#include "DCSClient/AliDCSClient.h"
73abe331 165#include "AliLog.h"
b948db8d 166#include "AliPreprocessor.h"
5164a766 167#include "AliShuttleStatus.h"
2bb7b766 168#include "AliShuttleLogbookEntry.h"
73abe331 169
57f50b3c 170#include <TSystem.h>
58bc3020 171#include <TObject.h>
b948db8d 172#include <TString.h>
57f50b3c 173#include <TTimeStamp.h>
73abe331 174#include <TObjString.h>
57f50b3c 175#include <TSQLServer.h>
176#include <TSQLResult.h>
177#include <TSQLRow.h>
cb343cfd 178#include <TMutex.h>
73abe331 179
e7f62f16 180#include <TMonaLisaWriter.h>
181
5164a766 182#include <fstream>
183
cb343cfd 184#include <sys/types.h>
185#include <sys/wait.h>
186
73abe331 187ClassImp(AliShuttle)
188
10a5a932 189Bool_t AliShuttle::fgkProcessDCS(kTRUE);
57f50b3c 190
b948db8d 191//______________________________________________________________________________________________
192AliShuttle::AliShuttle(const AliShuttleConfig* config,
193 UInt_t timeout, Int_t retries):
4f0ab988 194fConfig(config),
195fTimeout(timeout), fRetries(retries),
196fPreprocessorMap(),
2bb7b766 197fLogbookEntry(0),
eba76848 198fCurrentDetector(),
85a80aa9 199fStatusEntry(0),
cb343cfd 200fGridError(kFALSE),
201fMonitoringMutex(0),
eba76848 202fLastActionTime(0),
e7f62f16 203fLastAction(),
204fMonaLisa(0)
73abe331 205{
206 //
207 // config: AliShuttleConfig used
73abe331 208 // timeout: timeout used for AliDCSClient connection
209 // retries: the number of retries in case of connection error.
210 //
211
57f50b3c 212 if (!fConfig->IsValid()) AliFatal("********** !!!!! Invalid configuration !!!!! **********");
be48e3ea 213 for(int iSys=0;iSys<4;iSys++) {
57f50b3c 214 fServer[iSys]=0;
be48e3ea 215 if (iSys < 3)
2c15234c 216 fFXSlist[iSys].SetOwner(kTRUE);
57f50b3c 217 }
2bb7b766 218 fPreprocessorMap.SetOwner(kTRUE);
be48e3ea 219
220 for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
221 fFirstUnprocessed[iDet] = kFALSE;
222
cb343cfd 223 fMonitoringMutex = new TMutex();
58bc3020 224}
225
b948db8d 226//______________________________________________________________________________________________
57f50b3c 227AliShuttle::~AliShuttle()
58bc3020 228{
229// destructor
230
b948db8d 231 fPreprocessorMap.DeleteAll();
be48e3ea 232 for(int iSys=0;iSys<4;iSys++)
57f50b3c 233 if(fServer[iSys]) {
234 fServer[iSys]->Close();
235 delete fServer[iSys];
eba76848 236 fServer[iSys] = 0;
57f50b3c 237 }
2bb7b766 238
239 if (fStatusEntry){
240 delete fStatusEntry;
241 fStatusEntry = 0;
242 }
cb343cfd 243
244 if (fMonitoringMutex)
245 {
246 delete fMonitoringMutex;
247 fMonitoringMutex = 0;
248 }
73abe331 249}
250
b948db8d 251//______________________________________________________________________________________________
57f50b3c 252void AliShuttle::RegisterPreprocessor(AliPreprocessor* preprocessor)
58bc3020 253{
73abe331 254 //
b948db8d 255 // Registers new AliPreprocessor.
73abe331 256 // It uses GetName() for indentificator of the pre processor.
257 // The pre processor is registered it there isn't any other
258 // with the same identificator (GetName()).
259 //
260
eba76848 261 const char* detName = preprocessor->GetName();
262 if(GetDetPos(detName) < 0)
263 AliFatal(Form("********** !!!!! Invalid detector name: %s !!!!! **********", detName));
264
265 if (fPreprocessorMap.GetValue(detName)) {
266 AliWarning(Form("AliPreprocessor %s is already registered!", detName));
73abe331 267 return;
268 }
269
eba76848 270 fPreprocessorMap.Add(new TObjString(detName), preprocessor);
73abe331 271}
b948db8d 272//______________________________________________________________________________________________
84090f85 273UInt_t AliShuttle::Store(const AliCDBPath& path, TObject* object,
274 AliCDBMetaData* metaData, Int_t validityStart, Bool_t validityInfinite)
73abe331 275{
84090f85 276 // Stores a CDB object in the storage for offline reconstruction. Objects that are not needed for
277 // offline reconstruction, but should be stored anyway (e.g. for debugging) should NOT be stored
278 // using this function. Use StoreReferenceData instead!
85a80aa9 279 // It calls WriteToCDB function which perform actual storage
b948db8d 280
85a80aa9 281 return WriteToCDB(fgkMainCDB, fgkLocalCDB, path, object,
282 metaData, validityStart, validityInfinite);
84090f85 283
284}
285
286//______________________________________________________________________________________________
481441a2 287UInt_t AliShuttle::StoreReferenceData(const AliCDBPath& path, TObject* object, AliCDBMetaData* metaData)
84090f85 288{
289 // Stores a CDB object in the storage for reference data. This objects will not be available during
290 // offline reconstrunction. Use this function for reference data only!
85a80aa9 291 // It calls WriteToCDB function which perform actual storage
292
481441a2 293 return WriteToCDB(fgkMainRefStorage, fgkLocalRefStorage, path, object, metaData);
84090f85 294
85a80aa9 295}
296
297//______________________________________________________________________________________________
298UInt_t AliShuttle::WriteToCDB(const char* mainUri, const char* localUri,
299 const AliCDBPath& path, TObject* object, AliCDBMetaData* metaData,
300 Int_t validityStart, Bool_t validityInfinite)
301{
302 // write object into the CDB. Parameters are passed by Store and StoreReferenceData functions.
303 // The parameters are:
304 // 1) Uri of the main storage (Grid)
305 // 2) Uri of the backup storage (Local)
306 // 3) the object's path.
307 // 4) the object to be stored
308 // 5) the metaData to be associated with the object
309 // 6) the validity start run number w.r.t. the current run,
84090f85 310 // if the data is valid only for this run leave the default 0
85a80aa9 311 // 7) specifies if the calibration data is valid for infinity (this means until updated),
84090f85 312 // typical for calibration runs, the default is kFALSE
313 //
84090f85 314 // returns 0 if fail
85a80aa9 315 // 1 if stored in main (Grid) storage
316 // 2 if stored in backup (Local) storage
84090f85 317
85a80aa9 318 const char* cdbType = (mainUri == fgkMainCDB) ? "CDB" : "Reference";
2bb7b766 319
85a80aa9 320 Int_t firstRun = GetCurrentRun() - validityStart;
84090f85 321 if(firstRun < 0) {
2bb7b766 322 AliError("First valid run happens to be less than 0! Setting it to 0.");
84090f85 323 firstRun=0;
324 }
325
326 Int_t lastRun = -1;
327 if(validityInfinite) {
328 lastRun = AliCDBRunRange::Infinity();
329 } else {
330 lastRun = GetCurrentRun();
331 }
332
2bb7b766 333 AliCDBId id(path, firstRun, lastRun, -1, -1);
334
335 if(! dynamic_cast<TObjString*> (metaData->GetProperty("RunUsed(TObjString)"))){
336 TObjString runUsed = Form("%d", GetCurrentRun());
9e080f92 337 metaData->SetProperty("RunUsed(TObjString)", runUsed.Clone());
2bb7b766 338 }
84090f85 339
340 UInt_t result = 0;
341
85a80aa9 342 if (!(AliCDBManager::Instance()->GetStorage(mainUri))) {
2bb7b766 343 AliError(Form("WriteToCDB - Cannot activate main %s storage", cdbType));
84090f85 344 } else {
85a80aa9 345 result = (UInt_t) AliCDBManager::Instance()->GetStorage(mainUri)
84090f85 346 ->Put(object, id, metaData);
347 }
348
349 if(!result) {
350
351 Log(fCurrentDetector,
2bb7b766 352 Form("WriteToCDB - Problem with main %s storage. Putting <%s> into backup storage",
353 cdbType, path.GetPath().Data()));
354
355 // Set Grid version to current run number, to ease retrieval later
356 id.SetVersion(GetCurrentRun());
84090f85 357
85a80aa9 358 result = AliCDBManager::Instance()->GetStorage(localUri)
84090f85 359 ->Put(object, id, metaData);
360
361 if(result) {
362 result = 2;
85a80aa9 363 fGridError = kTRUE;
84090f85 364 }else{
2bb7b766 365 Log(fCurrentDetector, "WriteToCDB - Can't store data!");
b948db8d 366 }
367 }
2bb7b766 368
b948db8d 369 return result;
370
73abe331 371}
372
b948db8d 373//______________________________________________________________________________________________
5164a766 374AliShuttleStatus* AliShuttle::ReadShuttleStatus()
375{
2bb7b766 376// Reads the AliShuttleStatus from the CDB
5164a766 377
2bb7b766 378 if (fStatusEntry){
379 delete fStatusEntry;
380 fStatusEntry = 0;
381 }
5164a766 382
10a5a932 383 fStatusEntry = AliCDBManager::Instance()->GetStorage(GetLocalCDB())
2bb7b766 384 ->Get(Form("/SHUTTLE/STATUS/%s", fCurrentDetector.Data()), GetCurrentRun());
5164a766 385
2bb7b766 386 if (!fStatusEntry) return 0;
387 fStatusEntry->SetOwner(1);
5164a766 388
2bb7b766 389 AliShuttleStatus* status = dynamic_cast<AliShuttleStatus*> (fStatusEntry->GetObject());
390 if (!status) {
391 AliError("Invalid object stored to CDB!");
392 return 0;
393 }
5164a766 394
2bb7b766 395 return status;
5164a766 396}
397
398//______________________________________________________________________________________________
7bfb2090 399Bool_t AliShuttle::WriteShuttleStatus(AliShuttleStatus* status)
5164a766 400{
2bb7b766 401// writes the status for one subdetector
402
403 if (fStatusEntry){
404 delete fStatusEntry;
405 fStatusEntry = 0;
406 }
5164a766 407
2bb7b766 408 Int_t run = GetCurrentRun();
5164a766 409
2bb7b766 410 AliCDBId id(AliCDBPath("SHUTTLE", "STATUS", fCurrentDetector), run, run);
5164a766 411
2bb7b766 412 fStatusEntry = new AliCDBEntry(status, id, new AliCDBMetaData);
413 fStatusEntry->SetOwner(1);
5164a766 414
2bb7b766 415 UInt_t result = AliCDBManager::Instance()->GetStorage(fgkLocalCDB)->Put(fStatusEntry);
7bfb2090 416
2bb7b766 417 if (!result) {
418 AliError(Form("WriteShuttleStatus for %s, run %d failed", fCurrentDetector.Data(), run));
419 return kFALSE;
420 }
e7f62f16 421
422 SendMLInfo();
7bfb2090 423
2bb7b766 424 return kTRUE;
5164a766 425}
426
427//______________________________________________________________________________________________
428void AliShuttle::UpdateShuttleStatus(AliShuttleStatus::Status newStatus, Bool_t increaseCount)
429{
430 // changes the AliShuttleStatus for the given detector and run to the given status
431
2bb7b766 432 if (!fStatusEntry){
433 AliError("UNEXPECTED: fStatusEntry empty");
434 return;
435 }
5164a766 436
2bb7b766 437 AliShuttleStatus* status = dynamic_cast<AliShuttleStatus*> (fStatusEntry->GetObject());
5164a766 438
2bb7b766 439 if (!status){
440 AliError("UNEXPECTED: status could not be read from current CDB entry");
441 return;
442 }
5164a766 443
2c15234c 444 TString actionStr = Form("UpdateShuttleStatus - %s: Changing state from %s to %s",
eba76848 445 fCurrentDetector.Data(),
36c99a6a 446 status->GetStatusName(),
eba76848 447 status->GetStatusName(newStatus));
cb343cfd 448 Log("SHUTTLE", actionStr);
449 SetLastAction(actionStr);
5164a766 450
2bb7b766 451 status->SetStatus(newStatus);
452 if (increaseCount) status->IncreaseCount();
5164a766 453
2bb7b766 454 AliCDBManager::Instance()->GetStorage(fgkLocalCDB)->Put(fStatusEntry);
e7f62f16 455
456 SendMLInfo();
5164a766 457}
e7f62f16 458
459//______________________________________________________________________________________________
460void AliShuttle::SendMLInfo()
461{
462 //
463 // sends ML information about the current status of the current detector being processed
464 //
465
466 AliShuttleStatus* status = dynamic_cast<AliShuttleStatus*> (fStatusEntry->GetObject());
467
468 if (!status){
469 AliError("UNEXPECTED: status could not be read from current CDB entry");
470 return;
471 }
472
473 TMonaLisaText mlStatus(Form("%s_status", fCurrentDetector.Data()), status->GetStatusName());
474 TMonaLisaValue mlRetryCount(Form("%s_count", fCurrentDetector.Data()), status->GetCount());
475
476 TList mlList;
477 mlList.Add(&mlStatus);
478 mlList.Add(&mlRetryCount);
479
480 fMonaLisa->SendParameters(&mlList);
481}
482
5164a766 483//______________________________________________________________________________________________
484Bool_t AliShuttle::ContinueProcessing()
485{
2bb7b766 486// this function reads the AliShuttleStatus information from CDB and
487// checks if the processing should be continued
488// if yes it returns kTRUE and updates the AliShuttleStatus with nextStatus
489
57c1a579 490 if (!fConfig->HostProcessDetector(fCurrentDetector)) return kFALSE;
491
492 AliPreprocessor* aPreprocessor =
493 dynamic_cast<AliPreprocessor*> (fPreprocessorMap.GetValue(fCurrentDetector));
494 if (!aPreprocessor)
495 {
496 AliInfo(Form("%s: no preprocessor registered", fCurrentDetector.Data()));
497 return kFALSE;
498 }
499
2bb7b766 500 AliShuttleLogbookEntry::Status entryStatus =
eba76848 501 fLogbookEntry->GetDetectorStatus(fCurrentDetector);
2bb7b766 502
503 if(entryStatus != AliShuttleLogbookEntry::kUnprocessed) {
9e080f92 504 AliInfo(Form("ContinueProcessing - %s is %s",
2bb7b766 505 fCurrentDetector.Data(),
506 fLogbookEntry->GetDetectorStatusName(entryStatus)));
507 return kFALSE;
508 }
509
510 // if we get here, according to Shuttle logbook subdetector is in UNPROCESSED state
be48e3ea 511
512 // check if current run is first unprocessed run for current detector
513 if (fConfig->StrictRunOrder(fCurrentDetector) &&
514 !fFirstUnprocessed[GetDetPos(fCurrentDetector)])
515 {
516 Log("SHUTTLE", Form("ContinueProcessing - %s requires strict run ordering but this is not the first unprocessed run!"));
517 return kFALSE;
518 }
519
2bb7b766 520 AliShuttleStatus* status = ReadShuttleStatus();
521 if (!status) {
522 // first time
523 Log("SHUTTLE", Form("ContinueProcessing - %s: Processing first time",
524 fCurrentDetector.Data()));
525 status = new AliShuttleStatus(AliShuttleStatus::kStarted);
526 return WriteShuttleStatus(status);
527 }
528
529 // The following two cases shouldn't happen if Shuttle Logbook was correctly updated.
530 // If it happens it may mean Logbook updating failed... let's do it now!
531 if (status->GetStatus() == AliShuttleStatus::kDone ||
532 status->GetStatus() == AliShuttleStatus::kFailed){
533 Log("SHUTTLE", Form("ContinueProcessing - %s is already %s. Updating Shuttle Logbook",
534 fCurrentDetector.Data(),
535 status->GetStatusName(status->GetStatus())));
536 UpdateShuttleLogbook(fCurrentDetector.Data(),
537 status->GetStatusName(status->GetStatus()));
538 return kFALSE;
539 }
540
541 if (status->GetStatus() == AliShuttleStatus::kStoreFailed) {
542 Log("SHUTTLE",
543 Form("ContinueProcessing - %s: Grid storage of one or more objects failed. Trying again now",
544 fCurrentDetector.Data()));
545 if(TryToStoreAgain()){
546 Log(fCurrentDetector.Data(), "ContinueProcessing - All objects successfully stored into OCDB");
547 UpdateShuttleStatus(AliShuttleStatus::kDone);
548 UpdateShuttleLogbook(fCurrentDetector.Data(), "DONE");
549 } else {
550 Log("SHUTTLE",
551 Form("ContinueProcessing - %s: Grid storage failed again",
552 fCurrentDetector.Data()));
e7f62f16 553 // trigger ML information manually because we do not had a status change
554 SendMLInfo();
2bb7b766 555 }
556 return kFALSE;
557 }
558
559 // if we get here, there is a restart
57c1a579 560 Bool_t cont = kFALSE;
2bb7b766 561
562 // abort conditions
cb343cfd 563 if (status->GetCount() >= fConfig->GetMaxRetries()) {
57c1a579 564 Log("SHUTTLE", Form("ContinueProcessing - %s failed %d times in status %s - "
565 "Updating Shuttle Logbook", fCurrentDetector.Data(),
2bb7b766 566 status->GetCount(), status->GetStatusName()));
567 UpdateShuttleLogbook(fCurrentDetector.Data(), "FAILED");
e7f62f16 568 UpdateShuttleStatus(AliShuttleStatus::kFailed);
57c1a579 569 } else {
570 Log("SHUTTLE", Form("ContinueProcessing - %s: restarting. "
571 "Aborted before with %s. Retry number %d.", fCurrentDetector.Data(),
572 status->GetStatusName(), status->GetCount()));
573 UpdateShuttleStatus(AliShuttleStatus::kStarted, kTRUE);
574 cont = kTRUE;
2bb7b766 575 }
576
57c1a579 577 // Send mail to detector expert!
578 AliInfo(Form("Sending mail to %s expert...", fCurrentDetector.Data()));
579 if (!SendMail())
580 Log("SHUTTLE", Form("ContinueProcessing - Could not send mail to %s expert",
581 fCurrentDetector.Data()));
2bb7b766 582
57c1a579 583 return cont;
5164a766 584}
585
586//______________________________________________________________________________________________
2bb7b766 587Bool_t AliShuttle::Process(AliShuttleLogbookEntry* entry)
58bc3020 588{
73abe331 589 //
b948db8d 590 // Makes data retrieval for all detectors in the configuration.
2bb7b766 591 // entry: Shuttle logbook entry, contains run paramenters and status of detectors
592 // (Unprocessed, Inactive, Failed or Done).
d477ad88 593 // Returns kFALSE in case of error occured and kTRUE otherwise
73abe331 594 //
595
2bb7b766 596 if(!entry) return kFALSE;
597
598 fLogbookEntry = entry;
599
e7f62f16 600 if (fLogbookEntry->IsDone())
601 {
2bb7b766 602 Log("SHUTTLE","Process - Shuttle is already DONE. Updating logbook");
603 UpdateShuttleLogbook("shuttle_done");
604 fLogbookEntry = 0;
605 return kTRUE;
606 }
607
e7f62f16 608 // create ML instance that monitors this run
609 fMonaLisa = new TMonaLisaWriter(Form("%d", GetCurrentRun()), "SHUTTLE", "aliendb1.cern.ch");
610 // disable monitoring of other parameters that come e.g. from TFile
611 gMonitoringWriter = 0;
2bb7b766 612
613 AliInfo(Form("\n\n \t\t\t^*^*^*^*^*^*^*^*^*^*^*^* run %d: START ^*^*^*^*^*^*^*^*^*^*^*^* \n",
614 GetCurrentRun()));
615
e7f62f16 616
617 // Send the information to ML
618 TMonaLisaText mlStatus("SHUTTLE_status", "Processing");
619
620 TList mlList;
621 mlList.Add(&mlStatus);
622
623 fMonaLisa->SendParameters(&mlList);
624
eba76848 625 fLogbookEntry->Print("all");
57f50b3c 626
627 // Initialization
d477ad88 628 Bool_t hasError = kFALSE;
5164a766 629
2bb7b766 630 AliCDBStorage *mainCDBSto = AliCDBManager::Instance()->GetStorage(fgkMainCDB);
631 if(mainCDBSto) mainCDBSto->QueryCDB(GetCurrentRun());
632 AliCDBStorage *mainRefSto = AliCDBManager::Instance()->GetStorage(fgkMainRefStorage);
633 if(mainRefSto) mainRefSto->QueryCDB(GetCurrentRun());
d477ad88 634
57f50b3c 635 // Loop on detectors in the configuration
b948db8d 636 TIter iter(fConfig->GetDetectors());
2bb7b766 637 TObjString* aDetector = 0;
b948db8d 638
be48e3ea 639 while ((aDetector = (TObjString*) iter.Next()))
640 {
7bfb2090 641 fCurrentDetector = aDetector->String();
5164a766 642
9e080f92 643 if (ContinueProcessing() == kFALSE) continue;
644
2bb7b766 645 AliInfo(Form("\n\n \t\t\t****** run %d - %s: START ******",
646 GetCurrentRun(), aDetector->GetName()));
647
9d733021 648 for(Int_t iSys=0;iSys<3;iSys++) fFXSCalled[iSys]=kFALSE;
649
e7f62f16 650 Log(fCurrentDetector.Data(), "Starting processing");
85a80aa9 651
be48e3ea 652 Int_t pid = fork();
653
654 if (pid < 0)
655 {
656 Log("SHUTTLE", "ERROR: Forking failed");
657 }
658 else if (pid > 0)
659 {
660 // parent
661 AliInfo(Form("In parent process of %d - %s: Starting monitoring",
662 GetCurrentRun(), aDetector->GetName()));
663
664 Long_t begin = time(0);
665
666 int status; // to be used with waitpid, on purpose an int (not Int_t)!
667 while (waitpid(pid, &status, WNOHANG) == 0)
668 {
669 Long_t expiredTime = time(0) - begin;
670
671 if (expiredTime > fConfig->GetPPTimeOut())
672 {
673 Log("SHUTTLE", Form("Process time out. Run time: %d seconds. Killing...",
674 expiredTime));
675
676 kill(pid, 9);
677
678 hasError = kTRUE;
679
680 gSystem->Sleep(1000);
681 }
682 else
683 {
684 if (expiredTime % 60 == 0)
685 Log("SHUTTLE", Form("Checked process. Run time: %d seconds.",
686 expiredTime));
687 gSystem->Sleep(1000);
688 }
689 }
690
691 AliInfo(Form("In parent process of %d - %s: Client has terminated.",
692 GetCurrentRun(), aDetector->GetName()));
693
694 if (WIFEXITED(status))
695 {
696 Int_t returnCode = WEXITSTATUS(status);
697
698 Log("SHUTTLE", Form("The return code is %d", returnCode));
699
700 if (returnCode != 0)
701 hasError = kTRUE;
702 }
703 }
704 else if (pid == 0)
705 {
706 // client
707 AliInfo(Form("In client process of %d - %s", GetCurrentRun(), aDetector->GetName()));
708
709 UInt_t result = ProcessCurrentDetector();
710
711 Int_t returnCode = 0; // will be set to 1 in case of an error
712
713 if (!result)
714 {
715 returnCode = 1;
716 AliInfo(Form("\n \t\t\t****** run %d - %s: PREPROCESSOR ERROR ****** \n\n",
717 GetCurrentRun(), aDetector->GetName()));
718 }
719 else if (result == 2)
720 {
721 AliInfo(Form("\n \t\t\t****** run %d - %s: STORAGE ERROR ****** \n\n",
722 GetCurrentRun(), aDetector->GetName()));
723 } else
724 {
725 AliInfo(Form("\n \t\t\t****** run %d - %s: DONE ****** \n\n",
726 GetCurrentRun(), aDetector->GetName()));
727 }
728
729 if (result > 0)
730 {
2c15234c 731 // Process successful: Update time_processed field in FXS logbooks!
9d733021 732 if (UpdateTable() == kFALSE) returnCode = 1;
be48e3ea 733 }
734
4b95672b 735 for (UInt_t iSys=0; iSys<3; iSys++)
736 {
737 if (fFXSCalled[iSys]) fFXSlist[iSys].Clear();
738 }
739
be48e3ea 740 AliInfo(Form("Client process of %d - %s is exiting now with %d.",
741 GetCurrentRun(), aDetector->GetName(), returnCode));
742
743 // the client exits here
744 gSystem->Exit(returnCode);
745
746 AliError("We should never get here!!!");
747 }
7bfb2090 748 }
5164a766 749
2bb7b766 750 AliInfo(Form("\n\n \t\t\t^*^*^*^*^*^*^*^*^*^*^*^* run %d: FINISH ^*^*^*^*^*^*^*^*^*^*^*^* \n",
751 GetCurrentRun()));
752
753 //check if shuttle is done for this run, if so update logbook
754 TObjArray checkEntryArray;
755 checkEntryArray.SetOwner(1);
9e080f92 756 TString whereClause = Form("where run=%d", GetCurrentRun());
757 if (!QueryShuttleLogbook(whereClause.Data(), checkEntryArray) || checkEntryArray.GetEntries() == 0) {
758 Log("SHUTTLE", Form("Process - Warning: Cannot check status of run %d on Shuttle logbook!",
759 GetCurrentRun()));
760 return hasError == kFALSE;
761 }
b948db8d 762
9e080f92 763 AliShuttleLogbookEntry* checkEntry = dynamic_cast<AliShuttleLogbookEntry*>
764 (checkEntryArray.At(0));
2bb7b766 765
9e080f92 766 if (checkEntry)
767 {
768 if (checkEntry->IsDone())
be48e3ea 769 {
9e080f92 770 Log("SHUTTLE","Process - Shuttle is DONE. Updating logbook");
771 UpdateShuttleLogbook("shuttle_done");
772 }
773 else
774 {
775 for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
be48e3ea 776 {
9e080f92 777 if (checkEntry->GetDetectorStatus(iDet) == AliShuttleLogbookEntry::kUnprocessed)
be48e3ea 778 {
9e080f92 779 AliDebug(2, Form("Run %d: setting %s as \"not first time unprocessed\"",
780 checkEntry->GetRun(), GetDetName(iDet)));
781 fFirstUnprocessed[iDet] = kFALSE;
be48e3ea 782 }
783 }
2bb7b766 784 }
785 }
786
e7f62f16 787 // remove ML instance
788 delete fMonaLisa;
789 fMonaLisa = 0;
790
2bb7b766 791 fLogbookEntry = 0;
85a80aa9 792
a7160fe9 793 return hasError == kFALSE;
73abe331 794}
795
b948db8d 796//______________________________________________________________________________________________
2bb7b766 797UInt_t AliShuttle::ProcessCurrentDetector()
73abe331 798{
799 //
2bb7b766 800 // Makes data retrieval just for a specific detector (fCurrentDetector).
73abe331 801 // Threre should be a configuration for this detector.
73abe331 802
2bb7b766 803 AliInfo(Form("Retrieving values for %s, run %d", fCurrentDetector.Data(), GetCurrentRun()));
73abe331 804
7bfb2090 805 UpdateShuttleStatus(AliShuttleStatus::kDCSStarted);
73abe331 806
2c15234c 807 TMap dcsMap;
808 dcsMap.SetOwner(1);
73abe331 809
85a80aa9 810 Bool_t aDCSError = kFALSE;
811 fGridError = kFALSE;
d477ad88 812
2c15234c 813 // TODO Test only... I've added a flag that allows to
814 // exclude DCS archive DB query
815 if (!fgkProcessDCS)
816 {
817 AliInfo("Skipping DCS processing!");
818 aDCSError = kFALSE;
819 } else {
820 TString host(fConfig->GetDCSHost(fCurrentDetector));
821 Int_t port = fConfig->GetDCSPort(fCurrentDetector);
822
823 // Retrieval of Aliases
824 TObjString* anAlias = 0;
36c99a6a 825 Int_t iAlias = 1;
826 Int_t nTotAliases= ((TMap*)fConfig->GetDCSAliases(fCurrentDetector))->GetEntries();
2c15234c 827 TIter iterAliases(fConfig->GetDCSAliases(fCurrentDetector));
828 while ((anAlias = (TObjString*) iterAliases.Next()))
829 {
830 TObjArray *valueSet = new TObjArray();
831 valueSet->SetOwner(1);
832
36c99a6a 833 if (((iAlias-1) % 500) == 0 || iAlias == nTotAliases)
834 AliInfo(Form("Querying DCS archive: alias %s (%d of %d)",
835 anAlias->GetName(), iAlias++, nTotAliases));
2c15234c 836 aDCSError = (GetValueSet(host, port, anAlias->String(), valueSet, kAlias) == 0);
837
838 if(!aDCSError)
839 {
840 dcsMap.Add(anAlias->Clone(), valueSet);
841 } else {
842 Log(fCurrentDetector,
843 Form("ProcessCurrentDetector - Error while retrieving alias %s",
844 anAlias->GetName()));
845 UpdateShuttleStatus(AliShuttleStatus::kDCSError);
846 dcsMap.DeleteAll();
847 return 0;
848 }
4f0ab988 849 }
2c15234c 850
851 // Retrieval of Data Points
852 TObjString* aDP = 0;
36c99a6a 853 Int_t iDP = 0;
854 Int_t nTotDPs= ((TMap*)fConfig->GetDCSDataPoints(fCurrentDetector))->GetEntries();
2c15234c 855 TIter iterDP(fConfig->GetDCSDataPoints(fCurrentDetector));
856 while ((aDP = (TObjString*) iterDP.Next()))
857 {
858 TObjArray *valueSet = new TObjArray();
859 valueSet->SetOwner(1);
36c99a6a 860 if (((iDP-1) % 500) == 0 || iDP == nTotDPs)
861 AliInfo(Form("Querying DCS archive: DP %s (%d of %d)",
862 aDP->GetName(), iDP++, nTotDPs));
2c15234c 863 aDCSError = (GetValueSet(host, port, aDP->String(), valueSet, kDP) == 0);
864
865 if(!aDCSError)
866 {
867 dcsMap.Add(aDP->Clone(), valueSet);
868 } else {
869 Log(fCurrentDetector,
870 Form("ProcessCurrentDetector - Error while retrieving data point %s",
871 aDP->GetName()));
872 UpdateShuttleStatus(AliShuttleStatus::kDCSError);
873 dcsMap.DeleteAll();
874 return 0;
875 }
73abe331 876 }
877 }
b948db8d 878
2bb7b766 879 // DCS Archive DB processing successful. Call Preprocessor!
85a80aa9 880 UpdateShuttleStatus(AliShuttleStatus::kPPStarted);
a7160fe9 881
85a80aa9 882 AliPreprocessor* aPreprocessor =
5164a766 883 dynamic_cast<AliPreprocessor*> (fPreprocessorMap.GetValue(fCurrentDetector));
b948db8d 884
2bb7b766 885 aPreprocessor->Initialize(GetCurrentRun(), GetCurrentStartTime(), GetCurrentEndTime());
2c15234c 886 UInt_t aPPResult = aPreprocessor->Process(&dcsMap);
2bb7b766 887
888 UInt_t returnValue = 0;
85a80aa9 889 if (aPPResult == 0) { // Preprocessor error
cb343cfd 890 UpdateShuttleStatus(AliShuttleStatus::kPPError);
2bb7b766 891 returnValue = 0;
85a80aa9 892 } else if (fGridError == kFALSE) { // process and Grid storage ok!
893 UpdateShuttleStatus(AliShuttleStatus::kDone);
2bb7b766 894 UpdateShuttleLogbook(fCurrentDetector, "DONE");
895 Log(fCurrentDetector.Data(),
896 "ProcessCurrentDetector - Preprocessor and Grid storage ended successfully");
897 returnValue = 1;
85a80aa9 898 } else { // Grid storage error (process ok, but object put in local storage)
899 UpdateShuttleStatus(AliShuttleStatus::kStoreFailed);
2bb7b766 900 returnValue = 2;
85a80aa9 901 }
b948db8d 902
2c15234c 903 dcsMap.DeleteAll();
b948db8d 904
2bb7b766 905 return returnValue;
906}
907
908//______________________________________________________________________________________________
909Bool_t AliShuttle::QueryShuttleLogbook(const char* whereClause,
910 TObjArray& entries)
911{
912// Query DAQ's Shuttle logbook and fills detector status object.
913// Call QueryRunParameters to query DAQ logbook for run parameters.
914
fc5a4708 915 entries.SetOwner(1);
916
2bb7b766 917 // check connection, in case connect
be48e3ea 918 if(!Connect(3)) return kFALSE;
2bb7b766 919
920 TString sqlQuery;
921 sqlQuery = Form("select * from logbook_shuttle %s order by run", whereClause);
922
be48e3ea 923 TSQLResult* aResult = fServer[3]->Query(sqlQuery);
2bb7b766 924 if (!aResult) {
925 AliError(Form("Can't execute query <%s>!", sqlQuery.Data()));
926 return kFALSE;
927 }
928
fc5a4708 929 AliDebug(2,Form("Query = %s", sqlQuery.Data()));
930
2bb7b766 931 if(aResult->GetRowCount() == 0) {
9e080f92 932// if(sqlQuery.EndsWith("where shuttle_done=0 order by run")){
933// Log("SHUTTLE", "QueryShuttleLogbook - All runs in Shuttle Logbook are already DONE");
934// delete aResult;
935// return kTRUE;
936// } else {
937 AliInfo("No entries in Shuttle Logbook match request");
2bb7b766 938 delete aResult;
939 return kTRUE;
9e080f92 940// }
2bb7b766 941 }
942
943 // TODO Check field count!
fc5a4708 944 const UInt_t nCols = 22;
2bb7b766 945 if (aResult->GetFieldCount() != (Int_t) nCols) {
946 AliError("Invalid SQL result field number!");
947 delete aResult;
948 return kFALSE;
949 }
950
2bb7b766 951 TSQLRow* aRow;
952 while ((aRow = aResult->Next())) {
953 TString runString(aRow->GetField(0), aRow->GetFieldLength(0));
954 Int_t run = runString.Atoi();
955
eba76848 956 AliShuttleLogbookEntry *entry = QueryRunParameters(run);
957 if (!entry)
958 continue;
2bb7b766 959
960 // loop on detectors
eba76848 961 for(UInt_t ii = 0; ii < nCols; ii++)
962 entry->SetDetectorStatus(aResult->GetFieldName(ii), aRow->GetField(ii));
2bb7b766 963
eba76848 964 entries.AddLast(entry);
2bb7b766 965 delete aRow;
966 }
967
9e080f92 968// if(sqlQuery.EndsWith("where shuttle_done=0 order by run"))
969// Log("SHUTTLE", Form("QueryShuttleLogbook - Found %d unprocessed runs in Shuttle Logbook",
970// entries.GetEntriesFast()));
2bb7b766 971 delete aResult;
972 return kTRUE;
973}
974
975//______________________________________________________________________________________________
eba76848 976AliShuttleLogbookEntry* AliShuttle::QueryRunParameters(Int_t run)
2bb7b766 977{
eba76848 978 //
979 // Retrieve run parameters written in the DAQ logbook and sets them into AliShuttleLogbookEntry object
980 //
2bb7b766 981
982 // check connection, in case connect
be48e3ea 983 if (!Connect(3))
eba76848 984 return 0;
2bb7b766 985
986 TString sqlQuery;
2c15234c 987 sqlQuery.Form("select * from %s where run=%d", fConfig->GetDAQlbTable(), run);
2bb7b766 988
be48e3ea 989 TSQLResult* aResult = fServer[3]->Query(sqlQuery);
2bb7b766 990 if (!aResult) {
991 AliError(Form("Can't execute query <%s>!", sqlQuery.Data()));
eba76848 992 return 0;
2bb7b766 993 }
994
eba76848 995 if (aResult->GetRowCount() == 0) {
2bb7b766 996 Log("SHUTTLE", Form("QueryRunParameters - No entry in DAQ Logbook for run %d. Skipping", run));
997 delete aResult;
eba76848 998 return 0;
2bb7b766 999 }
1000
eba76848 1001 if (aResult->GetRowCount() > 1) {
2bb7b766 1002 AliError(Form("More than one entry in DAQ Logbook for run %d. Skipping", run));
1003 delete aResult;
eba76848 1004 return 0;
2bb7b766 1005 }
1006
eba76848 1007 TSQLRow* aRow = aResult->Next();
1008 if (!aRow)
1009 {
1010 AliError(Form("Could not retrieve row for run %d. Skipping", run));
1011 delete aResult;
1012 return 0;
1013 }
2bb7b766 1014
eba76848 1015 AliShuttleLogbookEntry* entry = new AliShuttleLogbookEntry(run);
2bb7b766 1016
eba76848 1017 for (Int_t ii = 0; ii < aResult->GetFieldCount(); ii++)
1018 entry->SetRunParameter(aResult->GetFieldName(ii), aRow->GetField(ii));
2bb7b766 1019
eba76848 1020 UInt_t startTime = entry->GetStartTime();
1021 UInt_t endTime = entry->GetEndTime();
1022
1023 if (!startTime || !endTime || startTime > endTime) {
1024 Log("SHUTTLE",
1025 Form("QueryRunParameters - Invalid parameters for Run %d: startTime = %d, endTime = %d",
1026 run, startTime, endTime));
1027 delete entry;
2bb7b766 1028 delete aRow;
eba76848 1029 delete aResult;
1030 return 0;
2bb7b766 1031 }
1032
eba76848 1033 delete aRow;
2bb7b766 1034 delete aResult;
eba76848 1035
1036 return entry;
2bb7b766 1037}
1038
1039//______________________________________________________________________________________________
1040Bool_t AliShuttle::TryToStoreAgain()
1041{
1042 // Called in case the detector failed to store the object in Grid OCDB
1043 // It tries to store the object again, if it does not find more recent and overlapping objects
1044 // Calls underlying TryToStoreAgain(const char*) function twice, for OCDB and Reference storage.
1045
1046 AliInfo("Trying to store OCDB data again...");
1047 Bool_t resultCDB = TryToStoreAgain(fgkMainCDB);
1048
1049 AliInfo("Trying to store reference data again...");
1050 Bool_t resultRef = TryToStoreAgain(fgkMainRefStorage);
1051
1052 return resultCDB && resultRef;
1053}
1054
1055//______________________________________________________________________________________________
1056Bool_t AliShuttle::TryToStoreAgain(TString& gridURI)
1057{
1058 // Called by TryToStoreAgain(), performs actual storage retry
1059
6ec0e06c 1060 TObjArray* gridIds=0;
2bb7b766 1061
1062 Bool_t result = kTRUE;
1063
1064 const char* type = 0;
1065 TString backupURI;
1066 if(gridURI == fgkMainCDB) {
1067 type = "OCDB";
1068 backupURI = fgkLocalCDB;
1069 } else if(gridURI == fgkMainRefStorage) {
1070 type = "reference";
1071 backupURI = fgkLocalRefStorage;
1072 } else {
1073 AliError(Form("Invalid storage URI: %s", gridURI.Data()));
1074 return kFALSE;
1075 }
1076
1077 AliCDBManager* man = AliCDBManager::Instance();
1078
1079 AliCDBStorage *gridSto = man->GetStorage(gridURI);
1080 if(!gridSto) {
1081 Log(fCurrentDetector.Data(),
1082 Form("TryToStoreAgain - cannot activate main %s storage", type));
1083 return kFALSE;
1084 }
1085
1086 gridIds = gridSto->GetQueryCDBList();
1087
1088 // get objects previously stored in local CDB
1089 AliCDBStorage *backupSto = man->GetStorage(backupURI);
eba76848 1090 AliCDBPath aPath(GetOfflineDetName(fCurrentDetector.Data()),"*","*");
2bb7b766 1091 // Local objects were stored with current run as Grid version!
1092 TList* localEntries = backupSto->GetAll(aPath.GetPath(), GetCurrentRun(), GetCurrentRun());
1093 localEntries->SetOwner(1);
1094
1095 // loop on local stored objects
1096 TIter localIter(localEntries);
1097 AliCDBEntry *aLocEntry = 0;
1098 while((aLocEntry = dynamic_cast<AliCDBEntry*> (localIter.Next()))){
1099 aLocEntry->SetOwner(1);
1100 AliCDBId aLocId = aLocEntry->GetId();
1101 aLocEntry->SetVersion(-1);
1102 aLocEntry->SetSubVersion(-1);
1103
1104 // loop on Grid valid Id's
1105 Bool_t store = kTRUE;
1106 TIter gridIter(gridIds);
1107 AliCDBId* aGridId = 0;
1108 while((aGridId = dynamic_cast<AliCDBId*> (gridIter.Next()))){
be48e3ea 1109 // If local object is valid up to infinity we store it only if it is
1110 // the first unprocessed run!
1111 if (aLocId.GetLastRun() == AliCDBRunRange::Infinity())
1112 {
1113 if (!fFirstUnprocessed[GetDetPos(fCurrentDetector)])
1114 {
1115 Log(fCurrentDetector.Data(),
2c15234c 1116 ("TryToStoreAgain - This object has validity infinite but "
1117 "there are previous unprocessed runs!"));
be48e3ea 1118 continue;
1119 } else {
1120 break;
1121 }
2bb7b766 1122 }
1123 if(aGridId->GetPath() != aLocId.GetPath()) continue;
1124 // skip all objects valid up to infinity
1125 if(aGridId->GetLastRun() == AliCDBRunRange::Infinity()) continue;
1126 // if we get here, it means there's already some more recent object stored on Grid!
1127 store = kFALSE;
1128 break;
1129 }
1130
1131 if(!store){
1132 Log(fCurrentDetector.Data(),
1133 Form("TryToStoreAgain - A more recent object already exists in %s storage: <%s>",
1134 type, aGridId->ToString().Data()));
1135 // removing local filename...
1136 // TODO maybe it's better not to remove it, it was not copied to the Grid!
1137 TString filename;
1138 backupSto->IdToFilename(aLocId, filename);
1139 AliInfo(Form("Removing local file %s", filename.Data()));
1140 gSystem->Exec(Form("rm %s",filename.Data()));
1141 continue;
1142 }
1143
1144 // If we get here, the file can be stored!
1145 Bool_t storeOk = gridSto->Put(aLocEntry);
1146 if(storeOk){
1147 Log(fCurrentDetector.Data(),
1148 Form("TryToStoreAgain - Object <%s> successfully put into %s storage",
1149 aLocId.ToString().Data(), type));
1150
1151 // removing local filename...
1152 TString filename;
1153 backupSto->IdToFilename(aLocId, filename);
1154 AliInfo(Form("Removing local file %s", filename.Data()));
1155 gSystem->Exec(Form("rm %s", filename.Data()));
1156 continue;
1157 } else {
1158 Log(fCurrentDetector.Data(),
1159 Form("TryToStoreAgain - Grid %s storage of object <%s> failed again",
1160 type, aLocId.ToString().Data()));
1161 result = kFALSE;
1162 }
1163 }
1164 localEntries->Clear();
1165
1166 return result;
73abe331 1167}
1168
b948db8d 1169//______________________________________________________________________________________________
2c15234c 1170Bool_t AliShuttle::GetValueSet(const char* host, Int_t port, const char* entry,
1171 TObjArray* valueSet, DCSType type)
73abe331 1172{
2c15234c 1173// Retrieve all "entry" data points from the DCS server
58bc3020 1174// host, port: TSocket connection parameters
2c15234c 1175// entry: name of the alias or data point
2bb7b766 1176// valueSet: array of retrieved AliDCSValue's
2c15234c 1177// type: kAlias or kDP
58bc3020 1178
73abe331 1179 AliDCSClient client(host, port, fTimeout, fRetries);
2c15234c 1180 if (!client.IsConnected())
1181 {
b948db8d 1182 return kFALSE;
73abe331 1183 }
1184
2c15234c 1185 Int_t result=0;
73abe331 1186
2c15234c 1187 if (type == kAlias)
1188 {
1189 result = client.GetAliasValues(entry,
1190 GetCurrentStartTime(), GetCurrentEndTime(), valueSet);
1191 } else
1192 if (type == kDP)
1193 {
1194 result = client.GetDPValues(entry,
1195 GetCurrentStartTime(), GetCurrentEndTime(), valueSet);
1196 }
1197
1198 if (result < 0)
1199 {
2bb7b766 1200 Log(fCurrentDetector.Data(), Form("GetValueSet - Can't get '%s'! Reason: %s",
2c15234c 1201 entry, AliDCSClient::GetErrorString(result)));
73abe331 1202
2c15234c 1203 if (result == AliDCSClient::fgkServerError)
1204 {
2bb7b766 1205 Log(fCurrentDetector.Data(), Form("GetValueSet - Server error: %s",
73abe331 1206 client.GetServerError().Data()));
1207 }
1208
1209 return kFALSE;
1210 }
1211
1212 return kTRUE;
1213}
b948db8d 1214
1215//______________________________________________________________________________________________
57f50b3c 1216const char* AliShuttle::GetFile(Int_t system, const char* detector,
1217 const char* id, const char* source)
b948db8d 1218{
57f50b3c 1219// Get calibration file from file exchange servers
9d733021 1220// First queris the FXS database for the file name, using the run, detector, id and source info
1221// then calls RetrieveFile(filename) for actual copy to local disk
2bb7b766 1222// run: current run being processed (given by Logbook entry fLogbookEntry)
eba76848 1223// detector: the Preprocessor name
57f50b3c 1224// id: provided as a parameter by the Preprocessor
1225// source: provided by the Preprocessor through GetFileSources function
1226
1227 // check connection, in case connect
9d733021 1228 if (!Connect(system))
eba76848 1229 {
9d733021 1230 Log(detector, Form("GetFile - Couldn't connect to %s FXS database", GetSystemName(system)));
57f50b3c 1231 return 0;
1232 }
1233
1234 // Query preparation
9d733021 1235 TString sourceName(source);
d386d623 1236 Int_t nFields = 3;
1237 TString sqlQueryStart = Form("select filePath,size,fileChecksum from %s where",
1238 fConfig->GetFXSdbTable(system));
1239 TString whereClause = Form("run=%d and detector=\"%s\" and fileId=\"%s\"",
1240 GetCurrentRun(), detector, id);
1241
9d733021 1242 if (system == kDAQ)
1243 {
d386d623 1244 whereClause += Form(" and DAQsource=\"%s\"", source);
57f50b3c 1245 }
9d733021 1246 else if (system == kDCS)
eba76848 1247 {
9d733021 1248 sourceName="none";
57f50b3c 1249 }
9d733021 1250 else if (system == kHLT)
9e080f92 1251 {
d386d623 1252 whereClause += Form(" and DDLnumbers=\"%s\"", source);
9d733021 1253 nFields = 3;
9e080f92 1254 }
1255
9e080f92 1256 TString sqlQuery = Form("%s %s", sqlQueryStart.Data(), whereClause.Data());
1257
1258 AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
1259
1260 // Query execution
1261 TSQLResult* aResult = 0;
9d733021 1262 aResult = dynamic_cast<TSQLResult*> (fServer[system]->Query(sqlQuery));
9e080f92 1263 if (!aResult) {
9d733021 1264 Log(detector, Form("GetFileName - Can't execute SQL query to %s database for: id = %s, source = %s",
1265 GetSystemName(system), id, sourceName.Data()));
9e080f92 1266 return 0;
1267 }
1268
1269 if(aResult->GetRowCount() == 0)
1270 {
1271 Log(detector,
9d733021 1272 Form("GetFileName - No entry in %s FXS db for: id = %s, source = %s",
1273 GetSystemName(system), id, sourceName.Data()));
9e080f92 1274 delete aResult;
1275 return 0;
1276 }
2bb7b766 1277
9e080f92 1278 if (aResult->GetRowCount() > 1) {
1279 Log(detector,
9d733021 1280 Form("GetFileName - More than one entry in %s FXS db for: id = %s, source = %s",
1281 GetSystemName(system), id, sourceName.Data()));
9e080f92 1282 delete aResult;
1283 return 0;
1284 }
1285
9d733021 1286 if (aResult->GetFieldCount() != nFields) {
9e080f92 1287 Log(detector,
9d733021 1288 Form("GetFileName - Wrong field count in %s FXS db for: id = %s, source = %s",
1289 GetSystemName(system), id, sourceName.Data()));
9e080f92 1290 delete aResult;
1291 return 0;
1292 }
1293
1294 TSQLRow* aRow = dynamic_cast<TSQLRow*> (aResult->Next());
1295
1296 if (!aRow){
9d733021 1297 Log(detector, Form("GetFileName - Empty set result in %s FXS db from query: id = %s, source = %s",
1298 GetSystemName(system), id, sourceName.Data()));
9e080f92 1299 delete aResult;
1300 return 0;
1301 }
1302
1303 TString filePath(aRow->GetField(0), aRow->GetFieldLength(0));
1304 TString fileSize(aRow->GetField(1), aRow->GetFieldLength(1));
d386d623 1305 TString fileChecksum(aRow->GetField(2), aRow->GetFieldLength(2));
9e080f92 1306
1307 delete aResult;
1308 delete aRow;
1309
d386d623 1310 AliDebug(2, Form("filePath = %s; size = %s, fileChecksum = %s",
1311 filePath.Data(), fileSize.Data(), fileChecksum.Data()));
9e080f92 1312
9e080f92 1313 // retrieved file is renamed to make it unique
9d733021 1314 TString localFileName = Form("%s_%s_%d_%s_%s.shuttle",
1315 GetSystemName(system), detector, GetCurrentRun(), id, sourceName.Data());
1316
9e080f92 1317
9d733021 1318 // file retrieval from FXS
4b95672b 1319 UInt_t nRetries = 0;
1320 UInt_t maxRetries = 3;
1321 Bool_t result = kFALSE;
1322
1323 // copy!! if successful TSystem::Exec returns 0
1324 while(nRetries++ < maxRetries) {
1325 AliDebug(2, Form("Trying to copy file. Retry # %d", nRetries));
1326 result = RetrieveFile(system, filePath.Data(), localFileName.Data());
1327 if(!result)
1328 {
1329 Log(detector, Form("GetFileName - Copy of file %s from %s FXS failed",
9d733021 1330 filePath.Data(), GetSystemName(system)));
4b95672b 1331 continue;
1332 } else {
1333 AliInfo(Form("File %s copied from %s FXS into %s/%s",
1334 filePath.Data(), GetSystemName(system),
1335 GetShuttleTempDir(), localFileName.Data()));
1336 }
9e080f92 1337
d386d623 1338 if (fileChecksum.Length()>0)
4b95672b 1339 {
1340 // compare md5sum of local file with the one stored in the FXS DB
1341 Int_t md5Comp = gSystem->Exec(Form("md5sum %s/%s |grep %s 2>&1 > /dev/null",
d386d623 1342 GetShuttleTempDir(), localFileName.Data(), fileChecksum.Data()));
9e080f92 1343
4b95672b 1344 if (md5Comp != 0)
1345 {
1346 Log(detector, Form("GetFileName - md5sum of file %s does not match with local copy!",
1347 filePath.Data()));
1348 result = kFALSE;
1349 continue;
1350 }
d386d623 1351 } else {
1352 Log(fCurrentDetector, Form("GetFile - md5sum of file %s not set in %s database, skipping comparison",
1353 filePath.Data(), GetSystemName(system)));
9d733021 1354 }
4b95672b 1355 if (result) break;
9e080f92 1356 }
1357
4b95672b 1358 if(!result) return 0;
1359
9d733021 1360 fFXSCalled[system]=kTRUE;
1361 TObjString *fileParams = new TObjString(Form("%s#!?!#%s", id, sourceName.Data()));
1362 fFXSlist[system].Add(fileParams);
9e080f92 1363
1364 static TString fullLocalFileName;
36c99a6a 1365 fullLocalFileName = TString::Format("%s/%s", GetShuttleTempDir(), localFileName.Data());
1366
9e080f92 1367 AliInfo(Form("fullLocalFileName = %s", fullLocalFileName.Data()));
1368
1369 return fullLocalFileName.Data();
2bb7b766 1370
1371}
1372
1373//______________________________________________________________________________________________
9d733021 1374Bool_t AliShuttle::RetrieveFile(UInt_t system, const char* fxsFileName, const char* localFileName)
9e080f92 1375{
9d733021 1376// Copies file from FXS to local Shuttle machine
2bb7b766 1377
9e080f92 1378 // check temp directory: trying to cd to temp; if it does not exist, create it
9d733021 1379 AliDebug(2, Form("Copy file %s from %s FXS into %s/%s",
1380 GetSystemName(system), fxsFileName, GetShuttleTempDir(), localFileName));
9e080f92 1381
36c99a6a 1382 void* dir = gSystem->OpenDirectory(GetShuttleTempDir());
9e080f92 1383 if (dir == NULL) {
36c99a6a 1384 if (gSystem->mkdir(GetShuttleTempDir(), kTRUE)) {
1385 AliError(Form("Can't open directory <%s>", GetShuttleTempDir()));
9e080f92 1386 return kFALSE;
1387 }
1388
1389 } else {
1390 gSystem->FreeDirectory(dir);
1391 }
1392
9d733021 1393 TString baseFXSFolder;
1394 if (system == kDAQ)
1395 {
1396 baseFXSFolder = "FES/";
1397 }
1398 else if (system == kDCS)
1399 {
1400 baseFXSFolder = "";
1401 }
1402 else if (system == kHLT)
1403 {
1404 baseFXSFolder = "~/";
1405 }
1406
1407
1408 TString command = Form("scp -oPort=%d -2 %s@%s:%s%s %s/%s",
1409 fConfig->GetFXSPort(system),
1410 fConfig->GetFXSUser(system),
1411 fConfig->GetFXSHost(system),
1412 baseFXSFolder.Data(),
1413 fxsFileName,
36c99a6a 1414 GetShuttleTempDir(),
9e080f92 1415 localFileName);
1416
1417 AliDebug(2, Form("%s",command.Data()));
1418
4b95672b 1419 Bool_t result = (gSystem->Exec(command.Data()) == 0);
9e080f92 1420
4b95672b 1421 return result;
9e080f92 1422}
1423
1424//______________________________________________________________________________________________
9d733021 1425TList* AliShuttle::GetFileSources(Int_t system, const char* detector, const char* id)
1426{
1427// Get sources producing the condition file Id from file exchange servers
1428
1429 if (system == kDCS)
1430 {
1431 AliError("DCS system has only one source of data!");
1432 return NULL;
1433
1434 }
9e080f92 1435
1436 // check connection, in case connect
9d733021 1437 if (!Connect(system))
1438 {
1439 Log(detector, Form("GetFile - Couldn't connect to %s FXS database", GetSystemName(system)));
1440 return NULL;
9e080f92 1441 }
1442
9d733021 1443 TString sourceName = 0;
1444 if (system == kDAQ)
1445 {
1446 sourceName = "DAQsource";
1447 } else if (system == kHLT)
1448 {
1449 sourceName = "DDLnumbers";
1450 }
1451
d386d623 1452 TString sqlQueryStart = Form("select %s from %s where", sourceName.Data(), fConfig->GetFXSdbTable(system));
9e080f92 1453 TString whereClause = Form("run=%d and detector=\"%s\" and fileId=\"%s\"",
1454 GetCurrentRun(), detector, id);
1455 TString sqlQuery = Form("%s %s", sqlQueryStart.Data(), whereClause.Data());
1456
1457 AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
1458
1459 // Query execution
1460 TSQLResult* aResult;
9d733021 1461 aResult = fServer[system]->Query(sqlQuery);
9e080f92 1462 if (!aResult) {
9d733021 1463 Log(detector, Form("GetFileSources - Can't execute SQL query to %s database for id: %s",
1464 GetSystemName(system), id));
9e080f92 1465 return 0;
1466 }
1467
9d733021 1468 if (aResult->GetRowCount() == 0)
1469 {
9e080f92 1470 Log(detector,
9d733021 1471 Form("GetFileSources - No entry in %s FXS table for id: %s", GetSystemName(system), id));
9e080f92 1472 delete aResult;
1473 return 0;
1474 }
1475
1476 TSQLRow* aRow;
1477 TList *list = new TList();
1478 list->SetOwner(1);
1479
9d733021 1480 while ((aRow = aResult->Next()))
1481 {
9e080f92 1482
9d733021 1483 TString source(aRow->GetField(0), aRow->GetFieldLength(0));
1484 AliDebug(2, Form("%s = %s", sourceName.Data(), source.Data()));
1485 list->Add(new TObjString(source));
9e080f92 1486 delete aRow;
1487 }
9d733021 1488
9e080f92 1489 delete aResult;
1490
1491 return list;
2bb7b766 1492}
1493
1494//______________________________________________________________________________________________
9d733021 1495Bool_t AliShuttle::Connect(Int_t system)
2bb7b766 1496{
9d733021 1497// Connect to MySQL Server of the system's FXS MySQL databases
1498// DAQ Logbook, Shuttle Logbook and DAQ FXS db are on the same host
57f50b3c 1499
9d733021 1500 // check connection: if already connected return
1501 if(fServer[system] && fServer[system]->IsConnected()) return kTRUE;
57f50b3c 1502
9d733021 1503 TString dbHost, dbUser, dbPass, dbName;
57f50b3c 1504
9d733021 1505 if (system < 3) // FXS db servers
1506 {
1507 dbHost = Form("mysql://%s:%d", fConfig->GetFXSdbHost(system), fConfig->GetFXSdbPort(system));
1508 dbUser = fConfig->GetFXSdbUser(system);
1509 dbPass = fConfig->GetFXSdbPass(system);
1510 dbName = fConfig->GetFXSdbName(system);
1511 } else { // Run & Shuttle logbook servers
1512 // TODO Will the Shuttle logbook server be the same as the Run logbook server ???
1513 dbHost = Form("mysql://%s:%d", fConfig->GetDAQlbHost(), fConfig->GetDAQlbPort());
1514 dbUser = fConfig->GetDAQlbUser();
1515 dbPass = fConfig->GetDAQlbPass();
1516 dbName = fConfig->GetDAQlbDB();
1517 }
57f50b3c 1518
9d733021 1519 fServer[system] = TSQLServer::Connect(dbHost.Data(), dbUser.Data(), dbPass.Data());
1520 if (!fServer[system] || !fServer[system]->IsConnected()) {
1521 if(system < 3)
1522 {
1523 AliError(Form("Can't establish connection to FXS database for %s",
1524 AliShuttleInterface::GetSystemName(system)));
1525 } else {
1526 AliError("Can't establish connection to Run logbook.");
57f50b3c 1527 }
9d733021 1528 if(fServer[system]) delete fServer[system];
1529 return kFALSE;
2bb7b766 1530 }
57f50b3c 1531
9d733021 1532 // Get tables
1533 TSQLResult* aResult=0;
1534 switch(system){
1535 case kDAQ:
1536 aResult = fServer[kDAQ]->GetTables(dbName.Data());
1537 break;
1538 case kDCS:
1539 aResult = fServer[kDCS]->GetTables(dbName.Data());
1540 break;
1541 case kHLT:
1542 aResult = fServer[kHLT]->GetTables(dbName.Data());
1543 break;
1544 default:
1545 aResult = fServer[3]->GetTables(dbName.Data());
1546 break;
1547 }
1548
1549 delete aResult;
2bb7b766 1550 return kTRUE;
1551}
57f50b3c 1552
9e080f92 1553//______________________________________________________________________________________________
9d733021 1554Bool_t AliShuttle::UpdateTable()
9e080f92 1555{
9d733021 1556// Update FXS table filling time_processed field in all rows corresponding to current run and detector
9e080f92 1557
9d733021 1558 Bool_t result = kTRUE;
9e080f92 1559
9d733021 1560 for (UInt_t system=0; system<3; system++)
1561 {
1562 if(!fFXSCalled[system]) continue;
9e080f92 1563
9d733021 1564 // check connection, in case connect
1565 if (!Connect(system))
1566 {
1567 Log(fCurrentDetector, Form("UpdateTable - Couldn't connect to %s FXS database", GetSystemName(system)));
1568 result = kFALSE;
1569 continue;
9e080f92 1570 }
9e080f92 1571
9d733021 1572 TTimeStamp now; // now
1573
1574 // Loop on FXS list entries
1575 TIter iter(&fFXSlist[system]);
1576 TObjString *aFXSentry=0;
1577 while ((aFXSentry = dynamic_cast<TObjString*> (iter.Next())))
1578 {
1579 TString aFXSentrystr = aFXSentry->String();
1580 TObjArray *aFXSarray = aFXSentrystr.Tokenize("#!?!#");
1581 if (!aFXSarray || aFXSarray->GetEntries() != 2 )
1582 {
1583 Log(fCurrentDetector, Form("UpdateTable - error updating %s FXS entry. Check string: <%s>",
1584 GetSystemName(system), aFXSentrystr.Data()));
1585 if(aFXSarray) delete aFXSarray;
1586 result = kFALSE;
1587 continue;
1588 }
1589 const char* fileId = ((TObjString*) aFXSarray->At(0))->GetName();
1590 const char* source = ((TObjString*) aFXSarray->At(1))->GetName();
1591
1592 TString whereClause;
1593 if (system == kDAQ)
1594 {
1595 whereClause = Form("where run=%d and detector=\"%s\" and fileId=\"%s\" and DAQsource=\"%s\";",
1596 GetCurrentRun(), fCurrentDetector.Data(), fileId, source);
1597 }
1598 else if (system == kDCS)
1599 {
1600 whereClause = Form("where run=%d and detector=\"%s\" and fileId=\"%s\";",
1601 GetCurrentRun(), fCurrentDetector.Data(), fileId);
1602 }
1603 else if (system == kHLT)
1604 {
1605 whereClause = Form("where run=%d and detector=\"%s\" and fileId=\"%s\" and DDLnumbers=\"%s\";",
1606 GetCurrentRun(), fCurrentDetector.Data(), fileId, source);
1607 }
1608
1609 delete aFXSarray;
9e080f92 1610
9d733021 1611 TString sqlQuery = Form("update %s set time_processed=%d %s", fConfig->GetFXSdbTable(system),
1612 now.GetSec(), whereClause.Data());
9e080f92 1613
9d733021 1614 AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
9e080f92 1615
9d733021 1616 // Query execution
1617 TSQLResult* aResult;
1618 aResult = dynamic_cast<TSQLResult*> (fServer[system]->Query(sqlQuery));
1619 if (!aResult)
1620 {
1621 Log(fCurrentDetector, Form("UpdateTable - %s db: can't execute SQL query <%s>",
1622 GetSystemName(system), sqlQuery.Data()));
1623 result = kFALSE;
1624 continue;
1625 }
1626 delete aResult;
9e080f92 1627 }
9e080f92 1628 }
1629
9d733021 1630 return result;
9e080f92 1631}
57f50b3c 1632
2bb7b766 1633//______________________________________________________________________________________________
1634Bool_t AliShuttle::UpdateShuttleLogbook(const char* detector, const char* status)
1635{
e7f62f16 1636 //
1637 // Update Shuttle logbook filling detector or shuttle_done column
1638 // ex. of usage: UpdateShuttleLogbook("PHOS", "DONE") or UpdateShuttleLogbook("shuttle_done")
1639 //
57f50b3c 1640
2bb7b766 1641 // check connection, in case connect
be48e3ea 1642 if(!Connect(3)){
2bb7b766 1643 Log("SHUTTLE", "UpdateShuttleLogbook - Couldn't connect to DAQ Logbook.");
1644 return kFALSE;
57f50b3c 1645 }
1646
2bb7b766 1647 TString detName(detector);
1648 TString setClause;
e7f62f16 1649 if(detName == "shuttle_done")
1650 {
2bb7b766 1651 setClause = "set shuttle_done=1";
e7f62f16 1652
1653 // Send the information to ML
1654 TMonaLisaText mlStatus("SHUTTLE_status", "Done");
1655
1656 TList mlList;
1657 mlList.Add(&mlStatus);
1658
1659 fMonaLisa->SendParameters(&mlList);
2bb7b766 1660 } else {
2bb7b766 1661 TString statusStr(status);
1662 if(statusStr.Contains("done", TString::kIgnoreCase) ||
1663 statusStr.Contains("failed", TString::kIgnoreCase)){
eba76848 1664 setClause = Form("set %s=\"%s\"", detector, status);
2bb7b766 1665 } else {
1666 Log("SHUTTLE",
1667 Form("UpdateShuttleLogbook - Invalid status <%s> for detector %s",
1668 status, detector));
1669 return kFALSE;
1670 }
1671 }
57f50b3c 1672
2bb7b766 1673 TString whereClause = Form("where run=%d", GetCurrentRun());
1674
1675 TString sqlQuery = Form("update logbook_shuttle %s %s",
1676 setClause.Data(), whereClause.Data());
57f50b3c 1677
2bb7b766 1678 AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
1679
1680 // Query execution
1681 TSQLResult* aResult;
be48e3ea 1682 aResult = dynamic_cast<TSQLResult*> (fServer[3]->Query(sqlQuery));
2bb7b766 1683 if (!aResult) {
1684 Log("SHUTTLE", Form("UpdateShuttleLogbook - Can't execute query <%s>", sqlQuery.Data()));
1685 return kFALSE;
57f50b3c 1686 }
2bb7b766 1687 delete aResult;
57f50b3c 1688
1689 return kTRUE;
1690}
1691
1692//______________________________________________________________________________________________
2bb7b766 1693Int_t AliShuttle::GetCurrentRun() const
1694{
1695// Get current run from logbook entry
57f50b3c 1696
2bb7b766 1697 return fLogbookEntry ? fLogbookEntry->GetRun() : -1;
57f50b3c 1698}
1699
1700//______________________________________________________________________________________________
2bb7b766 1701UInt_t AliShuttle::GetCurrentStartTime() const
1702{
1703// get current start time
57f50b3c 1704
2bb7b766 1705 return fLogbookEntry ? fLogbookEntry->GetStartTime() : 0;
57f50b3c 1706}
1707
1708//______________________________________________________________________________________________
2bb7b766 1709UInt_t AliShuttle::GetCurrentEndTime() const
1710{
1711// get current end time from logbook entry
57f50b3c 1712
2bb7b766 1713 return fLogbookEntry ? fLogbookEntry->GetEndTime() : 0;
57f50b3c 1714}
1715
b948db8d 1716//______________________________________________________________________________________________
1717void AliShuttle::Log(const char* detector, const char* message)
1718{
58bc3020 1719// Fill log string with a message
b948db8d 1720
36c99a6a 1721 void* dir = gSystem->OpenDirectory(GetShuttleLogDir());
84090f85 1722 if (dir == NULL) {
36c99a6a 1723 if (gSystem->mkdir(GetShuttleLogDir(), kTRUE)) {
1724 AliError(Form("Can't open directory <%s>", GetShuttleLogDir()));
84090f85 1725 return;
1726 }
b948db8d 1727
84090f85 1728 } else {
1729 gSystem->FreeDirectory(dir);
1730 }
b948db8d 1731
cb343cfd 1732 TString toLog = Form("%s (%d): %s - ", TTimeStamp(time(0)).AsString("s"), getpid(), detector);
e7f62f16 1733 if (GetCurrentRun() >= 0)
1734 toLog += Form("run %d - ", GetCurrentRun());
2bb7b766 1735 toLog += Form("%s", message);
1736
84090f85 1737 AliInfo(toLog.Data());
b948db8d 1738
84090f85 1739 TString fileName;
e7f62f16 1740 if (GetCurrentRun() >= 0)
1741 fileName.Form("%s/%s_%d.log", GetShuttleLogDir(), detector, GetCurrentRun());
1742 else
1743 fileName.Form("%s/%s.log", GetShuttleLogDir(), detector);
1744
84090f85 1745 gSystem->ExpandPathName(fileName);
1746
1747 ofstream logFile;
1748 logFile.open(fileName, ofstream::out | ofstream::app);
1749
1750 if (!logFile.is_open()) {
1751 AliError(Form("Could not open file %s", fileName.Data()));
1752 return;
1753 }
7bfb2090 1754
84090f85 1755 logFile << toLog.Data() << "\n";
b948db8d 1756
84090f85 1757 logFile.close();
b948db8d 1758}
2bb7b766 1759
2bb7b766 1760//______________________________________________________________________________________________
1761Bool_t AliShuttle::Collect(Int_t run)
1762{
eba76848 1763//
1764// Collects conditions data for all UNPROCESSED run written to DAQ LogBook in case of run = -1 (default)
1765// If a dedicated run is given this run is processed
1766//
1767// In operational mode, this is the Shuttle function triggered by the EOR signal.
1768//
2bb7b766 1769
eba76848 1770 if (run == -1)
1771 Log("SHUTTLE","Collect - Shuttle called. Collecting conditions data for unprocessed runs");
1772 else
1773 Log("SHUTTLE", Form("Collect - Shuttle called. Collecting conditions data for run %d", run));
cb343cfd 1774
1775 SetLastAction("Starting");
2bb7b766 1776
1777 TString whereClause("where shuttle_done=0");
eba76848 1778 if (run != -1)
1779 whereClause += Form(" and run=%d", run);
2bb7b766 1780
1781 TObjArray shuttleLogbookEntries;
be48e3ea 1782 if (!QueryShuttleLogbook(whereClause, shuttleLogbookEntries))
1783 {
cb343cfd 1784 Log("SHUTTLE", "Collect - Can't retrieve entries from Shuttle logbook");
2bb7b766 1785 return kFALSE;
1786 }
1787
9e080f92 1788 if (shuttleLogbookEntries.GetEntries() == 0)
1789 {
1790 if (run == -1)
1791 Log("SHUTTLE","Collect - Found no UNPROCESSED runs in Shuttle logbook");
1792 else
1793 Log("SHUTTLE", Form("Collect - Run %d is already DONE "
1794 "or it does not exist in Shuttle logbook", run));
1795 return kTRUE;
1796 }
1797
be48e3ea 1798 for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
1799 fFirstUnprocessed[iDet] = kTRUE;
1800
fc5a4708 1801 if (run != -1)
be48e3ea 1802 {
1803 // query Shuttle logbook for earlier runs, check if some detectors are unprocessed,
1804 // flag them into fFirstUnprocessed array
1805 TString whereClause(Form("where shuttle_done=0 and run < %d", run));
1806 TObjArray tmpLogbookEntries;
1807 if (!QueryShuttleLogbook(whereClause, tmpLogbookEntries))
1808 {
1809 Log("SHUTTLE", "Collect - Can't retrieve entries from Shuttle logbook");
1810 return kFALSE;
1811 }
1812
1813 TIter iter(&tmpLogbookEntries);
1814 AliShuttleLogbookEntry* anEntry = 0;
1815 while ((anEntry = dynamic_cast<AliShuttleLogbookEntry*> (iter.Next())))
1816 {
1817 for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
1818 {
1819 if (anEntry->GetDetectorStatus(iDet) == AliShuttleLogbookEntry::kUnprocessed)
1820 {
1821 AliDebug(2, Form("Run %d: setting %s as \"not first time unprocessed\"",
1822 anEntry->GetRun(), GetDetName(iDet)));
1823 fFirstUnprocessed[iDet] = kFALSE;
1824 }
1825 }
1826
1827 }
1828
1829 }
1830
1831 if (!RetrieveConditionsData(shuttleLogbookEntries))
1832 {
cb343cfd 1833 Log("SHUTTLE", "Collect - Process of at least one run failed");
2bb7b766 1834 return kFALSE;
1835 }
1836
36c99a6a 1837 Log("SHUTTLE", "Collect - Requested run(s) successfully processed");
eba76848 1838 return kTRUE;
2bb7b766 1839}
1840
2bb7b766 1841//______________________________________________________________________________________________
1842Bool_t AliShuttle::RetrieveConditionsData(const TObjArray& dateEntries)
1843{
1844// Retrieve conditions data for all runs that aren't processed yet
1845
1846 Bool_t hasError = kFALSE;
1847
1848 TIter iter(&dateEntries);
1849 AliShuttleLogbookEntry* anEntry;
1850
1851 while ((anEntry = (AliShuttleLogbookEntry*) iter.Next())){
1852 if (!Process(anEntry)){
1853 hasError = kTRUE;
1854 }
4b95672b 1855
1856 // clean SHUTTLE temp directory
1857 TString command = Form("rm -f %s/*.shuttle", GetShuttleTempDir());
1858 gSystem->Exec(command.Data());
2bb7b766 1859 }
1860
1861 return hasError == kFALSE;
1862}
cb343cfd 1863
1864//______________________________________________________________________________________________
1865ULong_t AliShuttle::GetTimeOfLastAction() const
1866{
1867 ULong_t tmp;
36c99a6a 1868
cb343cfd 1869 fMonitoringMutex->Lock();
be48e3ea 1870
cb343cfd 1871 tmp = fLastActionTime;
36c99a6a 1872
cb343cfd 1873 fMonitoringMutex->UnLock();
36c99a6a 1874
cb343cfd 1875 return tmp;
1876}
1877
1878//______________________________________________________________________________________________
1879const TString AliShuttle::GetLastAction() const
1880{
1881 // returns a string description of the last action
1882
1883 TString tmp;
36c99a6a 1884
cb343cfd 1885 fMonitoringMutex->Lock();
1886
1887 tmp = fLastAction;
1888
1889 fMonitoringMutex->UnLock();
1890
36c99a6a 1891 return tmp;
cb343cfd 1892}
1893
1894//______________________________________________________________________________________________
1895void AliShuttle::SetLastAction(const char* action)
1896{
1897 // updates the monitoring variables
36c99a6a 1898
cb343cfd 1899 fMonitoringMutex->Lock();
36c99a6a 1900
cb343cfd 1901 fLastAction = action;
1902 fLastActionTime = time(0);
1903
1904 fMonitoringMutex->UnLock();
1905}
eba76848 1906
1907//______________________________________________________________________________________________
1908const char* AliShuttle::GetRunParameter(const char* param)
1909{
1910// returns run parameter read from DAQ logbook
1911
1912 if(!fLogbookEntry) {
1913 AliError("No logbook entry!");
1914 return 0;
1915 }
1916
1917 return fLogbookEntry->GetRunParameter(param);
1918}
57c1a579 1919
d386d623 1920//______________________________________________________________________________________________
1921AliCDBEntry* AliShuttle::GetFromOCDB(const AliCDBPath& path)
1922{
1923// returns obiect from OCDB valid for current run
1924
1925 AliCDBStorage *sto = AliCDBManager::Instance()->GetStorage(fgkMainCDB);
1926 if (!sto)
1927 {
1928 Log("SHUTTLE", "GetFromOCDB - Cannot activate main OCDB for query!");
1929 return 0;
1930 }
1931
1932 return dynamic_cast<AliCDBEntry*> (sto->Get(path, GetCurrentRun()));
1933}
1934
57c1a579 1935//______________________________________________________________________________________________
1936Bool_t AliShuttle::SendMail()
1937{
1938// sends a mail to the subdetector expert in case of preprocessor error
1939
36c99a6a 1940 void* dir = gSystem->OpenDirectory(GetShuttleLogDir());
57c1a579 1941 if (dir == NULL)
1942 {
36c99a6a 1943 if (gSystem->mkdir(GetShuttleLogDir(), kTRUE))
57c1a579 1944 {
36c99a6a 1945 AliError(Form("Can't open directory <%s>", GetShuttleLogDir()));
57c1a579 1946 return kFALSE;
1947 }
1948
1949 } else {
1950 gSystem->FreeDirectory(dir);
1951 }
1952
1953 TString bodyFileName;
36c99a6a 1954 bodyFileName.Form("%s/mail.body", GetShuttleLogDir());
57c1a579 1955 gSystem->ExpandPathName(bodyFileName);
1956
1957 ofstream mailBody;
1958 mailBody.open(bodyFileName, ofstream::out);
1959
1960 if (!mailBody.is_open())
1961 {
1962 AliError(Form("Could not open mail body file %s", bodyFileName.Data()));
1963 return kFALSE;
1964 }
1965
1966 TString to="";
1967 TIter iterExperts(fConfig->GetResponsibles(fCurrentDetector));
1968 TObjString *anExpert=0;
1969 while ((anExpert = (TObjString*) iterExperts.Next()))
1970 {
1971 to += Form("%s,", anExpert->GetName());
1972 }
1973 to.Remove(to.Length()-1);
909732f7 1974 AliDebug(2, Form("to: %s",to.Data()));
57c1a579 1975
36c99a6a 1976 // TODO this will be removed...
1977 if (to.Contains("not_yet_set")) {
1978 AliInfo("List of detector responsibles not yet set!");
1979 return kFALSE;
1980 }
1981
57c1a579 1982 TString cc="alberto.colla@cern.ch";
1983
1984 TString subject = Form("%s Shuttle preprocessor error in run %d !",
1985 fCurrentDetector.Data(), GetCurrentRun());
909732f7 1986 AliDebug(2, Form("subject: %s", subject.Data()));
57c1a579 1987
1988 TString body = Form("Dear %s expert(s), \n\n", fCurrentDetector.Data());
1989 body += Form("SHUTTLE just detected that your preprocessor "
36c99a6a 1990 "exited with ERROR state in run %d!!\n\n", GetCurrentRun());
57c1a579 1991 body += Form("Please check %s status on the web page asap!\n\n", fCurrentDetector.Data());
1992 body += Form("The last 10 lines of %s log file are following:\n\n");
1993
909732f7 1994 AliDebug(2, Form("Body begin: %s", body.Data()));
57c1a579 1995
1996 mailBody << body.Data();
1997 mailBody.close();
1998 mailBody.open(bodyFileName, ofstream::out | ofstream::app);
1999
9d733021 2000 TString logFileName = Form("%s/%s_%d.log", GetShuttleLogDir(), fCurrentDetector.Data(), GetCurrentRun());
57c1a579 2001 TString tailCommand = Form("tail -n 10 %s >> %s", logFileName.Data(), bodyFileName.Data());
2002 if (gSystem->Exec(tailCommand.Data()))
2003 {
2004 mailBody << Form("%s log file not found ...\n\n", fCurrentDetector.Data());
2005 }
2006
2007 TString endBody = Form("------------------------------------------------------\n\n");
36c99a6a 2008 endBody += Form("In case of problems please contact the SHUTTLE core team.\n\n");
2009 endBody += "Please do not answer this message directly, it is automatically generated.\n\n";
57c1a579 2010 endBody += "Sincerely yours,\n\n \t\t\tthe SHUTTLE\n";
2011
909732f7 2012 AliDebug(2, Form("Body end: %s", endBody.Data()));
57c1a579 2013
2014 mailBody << endBody.Data();
2015
2016 mailBody.close();
2017
2018 // send mail!
2019 TString mailCommand = Form("mail -s \"%s\" -c %s %s < %s",
2020 subject.Data(),
2021 cc.Data(),
2022 to.Data(),
2023 bodyFileName.Data());
909732f7 2024 AliDebug(2, Form("mail command: %s", mailCommand.Data()));
57c1a579 2025
2026 Bool_t result = gSystem->Exec(mailCommand.Data());
2027
2028 return result == 0;
2029}
d386d623 2030
2031//______________________________________________________________________________________________
2032void AliShuttle::SetShuttleTempDir(const char* tmpDir)
2033{
2034// sets Shuttle temp directory
2035
2036 fgkShuttleTempDir = gSystem->ExpandPathName(tmpDir);
2037}
2038
2039//______________________________________________________________________________________________
2040void AliShuttle::SetShuttleLogDir(const char* logDir)
2041{
2042// sets Shuttle log directory
2043
2044 fgkShuttleLogDir = gSystem->ExpandPathName(logDir);
2045}